-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpgsc_calc.wdl
136 lines (117 loc) · 3.64 KB
/
pgsc_calc.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
version 1.0
workflow pgsc_calc {
input {
Array[File] vcf
Array[String] chromosome
String target_build = "GRCh38"
Array[String] pgs_id
Boolean run_ancestry
File ref_panel = ""
String sampleset_name = "cohort"
Array[String]? arguments
}
scatter (file in vcf) {
call prepare_genomes {
input:
vcf = file
}
}
call pgsc_calc_nextflow {
input:
pgen = prepare_genomes.pgen,
pvar = prepare_genomes.pvar,
psam = prepare_genomes.psam,
chromosome = chromosome,
pgs_id = pgs_id,
target_build = target_build,
run_ancestry = run_ancestry,
ref_panel = ref_panel,
sampleset = sampleset_name,
arguments = arguments
}
output {
Array[File] match_files = pgsc_calc_nextflow.match_files
Array[File] score_files = pgsc_calc_nextflow.score_files
Array[File] log_files = pgsc_calc_nextflow.log_files
}
meta {
author: "Stephanie Gogarten"
email: "[email protected]"
}
}
task prepare_genomes {
input {
File vcf
Int mem_gb = 16
Int cpu = 2
}
Int disk_size = ceil(2.5*(size(vcf, "GB"))) + 5
String filename = basename(vcf)
String basename = sub(filename, "[[:punct:]][bv]cf.*z?$", "")
String prefix = if (sub(filename, ".bcf", "") != filename) then "--bcf" else "--vcf"
command <<<
plink2 ~{prefix} ~{vcf} \
--allow-extra-chr \
--chr 1-22, X, Y, XY \
--make-pgen --out ~{basename}
>>>
output {
File pgen = "~{basename}.pgen"
File pvar = "~{basename}.pvar"
File psam = "~{basename}.psam"
}
runtime {
docker: "uwgac/pgsc_calc:0.1.0"
disks: "local-disk ~{disk_size} SSD"
memory: "~{mem_gb}G"
cpu: "~{cpu}"
}
}
task pgsc_calc_nextflow {
input {
Array[File] pgen
Array[File] pvar
Array[File] psam
Array[String] chromosome
String target_build
Array[String] pgs_id
Boolean run_ancestry
File ref_panel
String sampleset
Array[String]? arguments
Int disk_gb = 128
Int mem_gb = 64
Int cpu = 16
}
String ancestry_arg = if (run_ancestry) then "--run_ancestry " + ref_panel else ""
command <<<
set -e -o pipefail
Rscript -e "\
files <- readLines('~{write_lines(pgen)}'); \
chrs <- readLines('~{write_lines(chromosome)}'); \
stopifnot(length(files) == length(chrs)); \
file_prefix <- sub('.pgen$', '', files); \
sampleset <- tibble::tibble(sampleset = '~{sampleset}', path_prefix=file_prefix, chrom=chrs, format='pfile'); \
readr::write_csv(sampleset, 'samplesheet.csv'); \
"
nextflow run pgscatalog/pgsc_calc -r v2.0.0-alpha.5 -profile conda \
--input samplesheet.csv \
--target_build ~{target_build} \
--pgs_id ~{sep="," pgs_id} \
~{ancestry_arg} \
~{sep=" " arguments}
>>>
output {
File samplesheet = "samplesheet.csv"
Array[File] match_files = glob("results/~{sampleset}/match/*")
Array[File] score_files = glob("results/~{sampleset}/score/*")
Array[File] log_files = glob("results/pipeline_info/*")
}
runtime {
#docker: "uwgac/pgsc_calc:0.1.0"
docker: "us-docker.pkg.dev/primed-cc/pgsc-calc/pgsc_calc:0.1.0"
disks: "local-disk ~{disk_gb} SSD"
memory: "~{mem_gb}G"
cpu: "~{cpu}"
}
}