-
Notifications
You must be signed in to change notification settings - Fork 18
/
variants_pipeline_rnaseq_wrapper.pl
executable file
·162 lines (99 loc) · 5.79 KB
/
variants_pipeline_rnaseq_wrapper.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/perl
use strict;
use Getopt::Long qw(GetOptions);
use FindBin qw($Bin);
use lib "$Bin/lib";
use Schedule;
use Cluster;
my ($map, $group, $pair, $config, $help, $species, $scheduler, $targets, $request, $strand, $rnaseq_pipeline);
my $pre = 'TEMP';
my $output = "results";
my $priority_project = "ngs";
my $priority_group = "Pipeline";
my $uID = `/usr/bin/id -u -n`;
chomp $uID;
my $email = "$uID\@cbio.mskcc.org";
my $rsync = "/juno/res/bic/$uID";
my $tempdir = "/scratch/$uID";
GetOptions ('map=s' => \$map,
'group=s' => \$group,
'pair=s' => \$pair,
'pre=s' => \$pre,
'strand=s' => \$strand,
'targets=s' => \$targets,
'request=s' => \$request,
'species=s' => \$species,
'help' => \$help,
'output|out|o=s' => \$output,
'rsync=s' => \$rsync,
'priority_project=s' => \$priority_project,
'priority_group=s' => \$priority_group,
'email' => \$email,
'tempdir=s' => \$tempdir,
'rnaseq_pipeline=s' => \$rnaseq_pipeline) or exit(1);
if(!$map || !$group || !$pair || !$species || !$request || !$targets || !$strand || !$rnaseq_pipeline || $help){
print <<HELP;
USAGE: variants_pipeline.pl -wes -config CONFIG -species SPECIES -scheduler SCHEDULER
* MAP: file listing sample information for processing (REQUIRED)
* GROUP: file listing grouping of samples for realign/recal steps (REQUIRED, unless using -mdOnly flag)
* SPECIES: b37 (default: human), mm9, mm10 (default: mouse), hybrid (b37+mm10), mm10_custom, species_custom and dm6 currently supported (REQUIRED)
* TARGETS: name of targets assay; will search for targets/baits ilists and targets padded file in $Bin/targets/TARGETS unless given full path to targets directory; required for non-chipseq projects
* REQUEST: file containing request information such as PI, investigator, etc. (REQUIRED)
* EMAIL: email to send notication of finished final job of pipeline (default: $uID\@cbio.mskcc.org)
* PAIR: file listing tumor/normal pairing of samples for mutect/maf conversion; if not specified, considered unpaired
* PRE: output prefix (default: TEMP)
* OUTPUT: output results directory (default: results)
* RSYNC: path to rsync data for archive (default: /juno/res/bic/$uID)
* TEMPDIR: temp directory (default: /scratch/$uID)
* PRIORITY_PROJECT: sge notion of priority assigned to projects (default: ngs)
* PRIORITY_GROUP: lsf notion of priority assigned to groups (default: Pipeline)
* STANDARD_GENE: standard analysis - star alignment, htseq gene count
* RNASEQ_PIPELINE: path to the rnaseq pipeline repository
HELP
exit;
}
if($ENV{'LSF_ENVDIR'} eq "/common/lsf/conf"){
$scheduler = 'luna';
}
elsif($ENV{'LSF_ENVDIR'} eq "/common/juno/OS7/conf" or $ENV{'LSF_ENVDIR'} eq "/admin/lsfjuno/lsf/conf"){
$scheduler = 'juno';
}
elsif($ENV{'SGE_ROOT'} ne ""){
$scheduler = 'sge';
}
else{
die "unrecognized scheduler, valid scheduler [sge, luna, juno]";
}
my $curDir = `pwd`;
chomp $curDir;
my $cd = $curDir;
$cd =~ s/\//_/g;
if($output !~ /^\//){
$output = "$curDir/$output";
}
my $rna_output = "$output/rna";
die "Can't find mapping file $map\n" if(!-e $map);
die "Can't find pairing file $pair\n" if(!-e $pair);
die "Can't find grouping file $group\n" if(!-e $group);
die "Can't find request file $request\n" if(!-e $request);
die "Can't find rsync directory $rsync\n" if(!-d $rsync);
die "Can't find rnaseq pipeline script $rnaseq_pipeline/rnaseq_pipeline.pl\n" if(!-e "$rnaseq_pipeline/rnaseq_pipeline.pl");
if(!-d $output){
mkdir("$output", 0775) or die "Can't make $output";
mkdir("$output/progress", 0775) or die "Can't make $output/progress";
}
if(!-d $rna_output){
mkdir("$rna_output", 0775) or die "Can't make $rna_output";
}
if(!-d $tempdir){
mkdir("$tempdir", 0775) or die "Can't make $tempdir";
}
my %addParams = (scheduler => "$scheduler", runtime => "500", priority_project=> "$priority_project", priority_group=> "$priority_group", queues => "lau.q,lcg.q,nce.q", rerun => "0", iounits => "1");
my $additionalParams = Schedule::additionalParams(%addParams);
my %stdParams = (scheduler => "$scheduler", job_name => "$pre\_$uID\_RNA_VARIANTS_PREPROCESSING", cpu => "1", mem => "10", cluster_out => "$output/progress/$pre\_$uID\_RNA_VARIANTS_PREPROCESSING.log");
my $standardParams = Schedule::queuing(%stdParams);
`$standardParams->{submit} $standardParams->{job_name} $standardParams->{job_hold} $standardParams->{cpu} $standardParams->{mem} $standardParams->{cluster_out} $additionalParams $rnaseq_pipeline/rnaseq_pipeline.pl -species $species -priority_project $priority_project -priority_group $priority_group -email $email -request $request -pre $pre -map $map -config $rnaseq_pipeline/rnaseq_pipeline_config.txt -strand $strand -star -o $rna_output -rsync $rsync -alignment_only`;
`$Bin/jobSync $scheduler $pre\_$uID\_RNA_VARIANTS_PREPROCESSING`;
my %stdParams = (scheduler => "$scheduler", job_name => "$pre\_$uID\_RNA_VARIANTS_CALLING", job_hold => "$pre\_$uID\_RSYNC", cpu => "1", mem => "10", cluster_out => "$output/progress/$pre\_$uID\_RNA_VARIANTS_CALLING.log");
my $standardParams = Schedule::queuing(%stdParams);
`$standardParams->{submit} $standardParams->{job_name} $standardParams->{job_hold} $standardParams->{cpu} $standardParams->{mem} $standardParams->{cluster_out} $additionalParams $Bin/variants_pipeline.pl -species $species -priority_project $priority_project -priority_group $priority_group -tempdir $tempdir -email $email -request $request -pre $pre -map $map -config $Bin/variants_pipeline_config.txt -group $group -pair $pair -targets $targets -wes -rna $rna_output/gene/alignments/ -indelrealigner -o $output -rsync $rsync`;