-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMakefile
executable file
·659 lines (535 loc) · 22.3 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
# Arborist Makefile
# James A. Overton <[email protected]>
#
# Arborist builds "trees" that drive various IEDB finders.
# It merges IEDB data with community ontologies and other databases.
# The build follows these steps:
#
# 1. Fetch IEDB data: requires MySQL connection
# 2. Build NCBI Taxonomy
# 3. Build Organism Tree and determine active species
# 4. Select Proteomes for each active species
# 5. Build Protein Tree
# 6. TODO Build Molecule Tree (right now fetches old peptidic tree)
# 7. TODO Build Assay Tree
# 8. TODO Build Disease Tree (right now fetches old tree)
# 9. Build Leidos files
#
# TODO: test suite (in progress)
# TODO: geolocation tree, MHC tree
# TODO: merged SoT tree
# TODO: symlink?
### Configuration
#
# These are standard options to make Make sane:
# <http://clarkgrubb.com/makefile-style-guide#toc2>
MAKEFLAGS += --warn-undefined-variables
SHELL := bash
.SHELLFLAGS := -eu -o pipefail -c
.DEFAULT_GOAL := help
.DELETE_ON_ERROR:
.PRECIOUS:
.SUFFIXES:
export PATH := $(shell pwd)/bin:$(PATH)
### Main Tasks
#
# The main tasks for running Arborist.
.PHONY: help
help:
@echo "Arborist: build trees for the IEDB"
@echo ""
@echo "TASKS"
@echo " deps install dependencies"
@echo " iedb load IEDB data"
@echo " ncbitaxon build the NCBI Taxonomy"
@echo " organism build the organism and subspecies trees"
@echo " proteome select proteomes"
@echo " protein build the protein tree"
@echo " molecule build the molecule tree"
@echo " leidos copy files for Leidos"
@echo " all build all trees"
@echo " serve run the web interface on localhost:3000"
@echo " clean remove all build files"
@echo " clobber remove all generated files"
@echo " weekly builds all trees without proteome step"
@echo " weekly_clean removes directories and files made by weekly build"
@echo " help print this message"
# Dependencies are added to this list below.
.PHONY: deps
deps:
.PHONY: all
all: deps iedb ncbitaxon organism proteome protein molecule disease leidos
.PHONY: weekly
weekly: deps iedb ncbitaxon organism protein molecule disease leidos
.PHONY: weekly_clean
weekly_clean:
chmod +w -R cache/
rm -rf build/iedb/ build/arborist/ cache/ current/
rm -rf build/disease*
.PHONY: serve
serve: src/util/serve.py
$(VENV_PYTHON) $<
.PHONY: clean
clean:
mv species/ .
rm -rf build
mv species/ build/
.PHONY: clobber
clobber:
chmod +w -R cache/
rm -rf bin/ build/ cache/ current/
bin/ build/ cache/ current/:
mkdir -p $@
.PHONY: test
test:
$(MAKE) -C test test
### Install Dependencies
#
# For each software dependency we use Make's `ifeq` conditional
# command -v <name>` to check if the dependency is already in
# the PATH (including `bin/`). If not, then we define a Makefile
# task to install it to `bin/`, and add that dependency to `deps`.
# Require SQLite
ifeq ($(shell command -v sqlite3),)
$(error 'Please install SQLite 3')
endif
# Require MySQL or MariaDB
ifeq ($(shell command -v mysql),)
$(error "Please install 'mariadb' from MariaDB")
endif
# Require Python
ifeq ($(shell command -v python3),)
$(error 'Please install Python 3, so we can run various scripts')
endif
# Require Java
ifeq ($(shell command -v java),)
$(error 'Please install Java, so we can run ROBOT and LDTab')
endif
# Install ROBOT if not already present
ifeq ($(shell command -v robot),)
bin/robot.jar: | bin/
curl -L -o $@ 'https://github.com/ontodev/robot/releases/download/v1.9.4/robot.jar'
bin/robot: bin/robot.jar
curl -L -o $@ 'https://raw.githubusercontent.com/ontodev/robot/master/bin/robot'
chmod +x $@
deps: bin/robot
endif
# Install LDTab if not already present
ifeq ($(shell command -v ldtab),)
bin/ldtab.jar: | bin/
curl -L -o $@ 'https://github.com/ontodev/ldtab.clj/releases/download/v2023-12-21/ldtab.jar'
bin/ldtab: bin/ldtab.jar
echo '#!/bin/sh' > $@
echo 'java -jar "$$(dirname $$0)/ldtab.jar" "$$@"' >> $@
chmod +x $@
deps: bin/ldtab
endif
# Install Nanobot if not already present
ifeq ($(shell command -v nanobot),)
bin/nanobot: | bin/
curl -L -k -o $@ 'https://github.com/ontodev/nanobot.rs/releases/download/v2023-10-26/nanobot-x86_64-unknown-linux-musl'
chmod +x $@
deps: bin/nanobot
endif
# Install valve-export script if not already present
ifeq ($(shell command -v valve-export),)
bin/valve-export: | bin/
curl -L -o $@ 'https://github.com/ontodev/valve.rs/raw/main/scripts/export.py'
chmod +x $@
deps: bin/valve-export
endif
# Install QSV if not already present
ifeq ($(shell command -v qsv),)
QSV_VERSION := 0.118.0
bin/qsv: | bin/ build/
curl -L -k -o build/qsv.zip 'https://github.com/jqnatividad/qsv/releases/download/$(QSV_VERSION)/qsv-$(QSV_VERSION)-x86_64-unknown-linux-musl.zip'
cd build && unzip qsv.zip qsv
mv build/qsv $@
deps: bin/qsv
endif
# Install BLAST if not already present
ifeq ($(shell command -v blastp),)
BLAST_VERSION := 2.16.0
build/ncbi-blast.tar.gz: | build/
curl -L -o $@ 'https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/$(BLAST_VERSION)/ncbi-blast-$(BLAST_VERSION)+-x64-linux.tar.gz'
bin/blastp bin/makeblastdb: build/ncbi-blast.tar.gz | bin/
cd build/ && tar -zxvf $(notdir $<) ncbi-blast-$(BLAST_VERSION)+/$@
mv build/ncbi-blast-$(BLAST_VERSION)+/$@ $@
deps: bin/blastp bin/makeblastdb
endif
# Install HMMER if not already present
ifeq ($(shell command -v hmmscan),)
HMMER_VERSION := 3.4
build/hmmer-$(HMMER_VERSION).tar.gz: | build/
curl -L -o $@ 'http://eddylab.org/software/hmmer/hmmer-$(HMMER_VERSION).tar.gz'
build/hmmer-$(HMMER_VERSION): build/hmmer-$(HMMER_VERSION).tar.gz
cd build/ && tar xvf $(notdir $<)
bin/hmmscan: build/hmmer-$(HMMER_VERSION) | bin/
cd $< && ./configure --prefix $(shell pwd)/$< && make install
cp $</$@ $@
deps: bin/hmmscan
endif
# Install MMseqs2 if not already present
ifeq ($(shell command -v mmseqs),)
MMSEQS_VERSION := 15-6f452
build/mmseqs-linux-sse41.tar.gz: | build/
curl -L -o $@ 'https://github.com/soedinglab/MMseqs2/releases/download/$(MMSEQS_VERSION)/mmseqs-linux-sse41.tar.gz'
build/mmseqs: build/mmseqs-linux-sse41.tar.gz
cd build/ && tar xvf $(notdir $<) && mv mmseqs mmseqs-linux-sse41
bin/mmseqs: build/mmseqs | bin/
cp build/mmseqs-linux-sse41/bin/mmseqs $@
rm -rf build/mmseqs-linux-sse41
deps: bin/mmseqs
endif
### 1. Fetch IEDB Data
#
# Copy some tables from the IEDB, validate some of them,
# and load them into a SQLite database.
build/iedb/:
mkdir -p $@
build/iedb/nanobot.toml: src/iedb/nanobot.toml | build/iedb/
cp $< $@
# Update the cached IEDB data if current/iedb/ is not yet set.
# Requires MySQL/MariaDB connection details.
current/iedb/%:
src/iedb/update-cache "$$IEDB_MYSQL_DATABASE"
# Unzip an IEDB table from the cache into the build directory,
build/iedb/%.tsv: current/iedb/%.tsv.gz | build/iedb/
zcat $< > $@
# Load IEDB tables into SQLite using Nanobot.
# For some tables we just create a header row, with no data rows,
# then remove those files after Nanobot has initialized.
# TODO: This task will be much simpler with planned VALVE features.
build/iedb/nanobot.db: build/iedb/nanobot.toml
build/iedb/nanobot.db: build/iedb/ncbi_include.tsv
build/iedb/nanobot.db: build/iedb/iedb_taxa.tsv
build/iedb/nanobot.db: current/iedb/source.tsv.gz
build/iedb/nanobot.db: current/iedb/object.tsv.gz
build/iedb/nanobot.db: current/iedb/epitope.tsv.gz
rm -f $@
zcat current/iedb/source.tsv.gz | head -n1 > build/iedb/source.tsv || exit 0
zcat current/iedb/object.tsv.gz | head -n1 > build/iedb/object.tsv || exit 0
zcat current/iedb/epitope.tsv.gz | head -n1 > build/iedb/epitope.tsv || exit 0
# create peptide.tsv with just headers
qsv search --select table 'peptide\b' src/iedb/column.tsv \
| qsv select column \
| qsv behead \
| tr '\n' '\t' \
| sed 's/ $$//' \
> build/iedb/peptide.tsv
# create peptide_source.tsv with just headers
qsv search --select table 'peptide_source\b' src/iedb/column.tsv \
| qsv select column \
| qsv behead \
| tr '\n' '\t' \
| sed 's/ $$//' \
> build/iedb/peptide_source.tsv
cd build/iedb/ && nanobot init
rm -f build/iedb/source.tsv
rm -f build/iedb/object.tsv
rm -f build/iedb/epitope.tsv
rm -f build/iedb/peptide.tsv
rm -f build/iedb/peptide_source.tsv
# Load a table in SQLite, without VALVE validation.
build/iedb/%.built: build/iedb/%.tsv | build/iedb/nanobot.db
sqlite3 $| "DELETE FROM '$*'"
src/util/tsv2sqlite $| $<
touch $@
# Extract tables of peptides and sources from SQLite tables.
build/iedb/peptide.tsv: src/iedb/peptide.sql build/iedb/epitope.built build/iedb/object.built | build/iedb/nanobot.db
src/util/sqlite2tsv $| $< $@
build/iedb/peptide_source.tsv: src/iedb/peptide_source.sql build/iedb/source.built | build/iedb/nanobot.db
src/util/sqlite2tsv $| $< $@
build/iedb/structure.tsv: build/iedb/peptide.tsv
zcat current/iedb/structure.tsv.gz | cut -f1,2,13,15 > $@
src/util/map_structure_ids.py $@ $<
.PHONY: iedb
iedb: build/iedb/peptide.built build/iedb/peptide_source.built build/iedb/structure.tsv
### 2. Build NCBI Taxonomy
#
# Set up a Nanobot instance.
# Fetch the NCBI Taxonomy `taxdmp.zip` file
# and use it to populate a table.
build/arborist/:
mkdir -p $@
build/arborist/nanobot.toml: src/arborist/nanobot.toml | build/arborist/
cp $< $@
# Initialize a Nanobot database.
# Create an empty organism-tree.tsv.
build/arborist/nanobot.db: build/arborist/nanobot.toml src/arborist/*.tsv
rm -f $@ $(dir $@)*.built
cd build/arborist/ && nanobot init
cache/ncbitaxon/:
mkdir $@
TAXDMP_VERSION := $(shell date +"%Y-%m-01")
# Fetch the taxdmp.zip for this month.
cache/ncbitaxon/taxdmp_$(TAXDMP_VERSION).zip: | cache/ncbitaxon/ current/
curl -L -o $@ https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump_archive/taxdmp_$(TAXDMP_VERSION).zip
current/taxdmp.zip: cache/ncbitaxon/taxdmp_$(TAXDMP_VERSION).zip
rm -f $@
cd current/ && ln -s ../$< taxdmp.zip
build/arborist/ncbitaxon.built: src/organism/ncbitaxon2ldtab.py current/taxdmp.zip | build/arborist/nanobot.db
sqlite3 $| "DROP TABLE IF EXISTS ncbitaxon"
$(VENV_PYTHON) $^ $|
sqlite3 $| "CREATE INDEX idx_ncbitaxon_subject ON ncbitaxon(subject)"
sqlite3 $| "CREATE INDEX idx_ncbitaxon_predicate ON ncbitaxon(predicate)"
sqlite3 $| "CREATE INDEX idx_ncbitaxon_object ON ncbitaxon(object)"
sqlite3 $| "ANALYZE ncbitaxon"
touch $@
.PHONY: ncbitaxon
ncbitaxon: build/arborist/ncbitaxon.built
### 3. Build Organism Tree
#
# Build the organism tree from organism_core, IEDB taxa, and active NCBI taxo.
# Determine all the active species.
# Build the subspecies tree by adding all descendants of active species
# from the full NCBI Taxonomy.
# TODO: Check that this is a reasonable way to count.
build/arborist/peptide-count.tsv: src/organism/peptide-count.sql build/iedb/peptide.built | build/iedb/nanobot.db
src/util/sqlite2tsv $| $< $@
# Render the organism_core as HTML.
build/arborist/organism_core.html: src/organism/render_organism_core.py src/organism/organism_core.tsv | build/arborist/
$(VENV_PYTHON) $^ $@
# Build a new organism tree.
build/arborist/organism-tree.tsv: src/organism/assign_species.py build/arborist/ncbitaxon.built src/organism/organism_core.tsv build/iedb/ncbi_include.tsv build/iedb/iedb_taxa.tsv build/arborist/peptide-count.tsv | build/arborist/nanobot.db
$(VENV_PYTHON) $< $| $(filter %.tsv, $^) $@
qsv sort $@ --output $@
# Convert the organism tree to an LDTab table in SQLite.
build/arborist/organism-tree.built: src/organism/build_organism_tree.py build/arborist/organism-tree.tsv | build/arborist/nanobot.db
sqlite3 $| "DROP TABLE IF EXISTS organism_tree"
$(VENV_PYTHON) $< $| $(filter %.tsv, $^)
touch $@
build/arborist/subspecies-tree.built: src/organism/build_subspecies_tree.py build/arborist/organism-tree.built | build/arborist/nanobot.db
sqlite3 $| "DROP TABLE IF EXISTS subspecies_tree"
$(VENV_PYTHON) $< $|
touch $@
# Export sorted LDTab TSV files.
build/arborist/%-tree-ldtab.tsv: build/arborist/%-tree.built | build/arborist/nanobot.db
rm -f $@
ldtab export $| --table $*_tree $@
mv $@ [email protected]
qsv sort [email protected] --output $@
build/arborist/%-tree.ttl: build/arborist/%-tree.built | build/arborist/nanobot.db
rm -f $@
ldtab export $| --table $*_tree $@
build/arborist/%-tree.owl: build/arborist/%-tree.ttl src/organism/predicates.ttl
robot merge --input $< --input $(word 2,$^) \
annotate \
--ontology-iri https://ontology.iedb.org/ontology/$(notdir $@) \
--output $@
build/arborist/active-species.tsv: src/organism/get_active_species.py build/arborist/organism-tree.built build/arborist/peptide-count.tsv | build/arborist/nanobot.db
$(VENV_PYTHON) $< $| $(filter %.tsv, $^) $@
.PHONY: organism
organism: build/arborist/organism_core.html
organism: build/arborist/organism-tree-ldtab.tsv build/arborist/subspecies-tree-ldtab.tsv
organism: build/arborist/organism-tree.owl build/arborist/subspecies-tree.owl
organism: build/arborist/active-species.tsv build/arborist/proteome.tsv
make reload
build/organisms/latest/: build/arborist/subspecies-tree.owl
rm -rf $@
mkdir -p $@
cp $^ $@
chmod 644 $@*
### 4. Select Proteomes for each active species
#
# Create a directory for any active species.
# Use the active_taxa column to get all descendant taxa.
# Copy peptides and sources into the directory.
# Select a proteome for that species,
# fetching FASTA and XML annotations.
# TODO: Use previously selected proteomes or force refresh.
build/arborist/proteome.tsv: build/arborist/active-species.tsv src/protein/data/proteomes.tsv
qsv join --left 'Species ID' $< 'Species ID' $(word 2,$^) \
| qsv select 1-6,12- --output $@
build/species/%/:
mkdir -p $@
# Get active taxa list as a regular expression pattern of alternates,
# for use by `qsv select`, e.g. 1053|11057|11059|11060|...
build/species/%/taxa.txt: build/arborist/active-species.tsv | build/species/%/
awk 'BEGIN {FS="\t"} $$2==$* {print $$4}' $< | \
sed 's/, /|/g' > $@
build/species/%/epitopes.tsv: build/iedb/peptide.tsv build/species/%/taxa.txt
qsv search --select 'Organism ID' `cat build/species/$*/taxa.txt` $< --output $@
build/species/%/sources.tsv: build/iedb/peptide_source.tsv build/species/%/taxa.txt
qsv search --select 'Organism ID' `cat build/species/$*/taxa.txt` $< --output $@
.PRECIOUS: build/species/%/epitopes.tsv build/species/%/sources.tsv
build/arborist/proteomes.built: build/arborist/proteome.tsv
$(VENV_PYTHON) src/protein/protein_tree/select_proteome.py -b build/
# Remove epitope counts from proteome table.
build/arborist/proteome_after.tsv: build/arborist/proteome.tsv
qsv select 1-5,7- $< --output $@
# Compare new proteomes to src/proteome.
build/arborist/proteome.html: src/protein/data/proteomes.tsv build/arborist/proteome_after.tsv
daff --output $@ $^
# TODO: delete all proteome.db files when reselecting so that they are remade
.PHONY: proteome
proteome: build/arborist/proteomes.built
### 5. Build Protein Tree
build/arborist/allergens.csv: | build/
curl -L --retry 10 -o $@ 'http://www.allergen.org/csv.php?table=joint'
build/arborist/allergens.tsv: src/util/csv2tsv.py build/arborist/allergens.csv
$(VENV_PYTHON) $^ $@
build/arborist/allergens.json: src/protein/data/allergens.json
cp $< $@
build/arborist/manual-parents.tsv: build/arborist/allergens.tsv build/arborist/allergens.json
cp src/protein/data/manual-parents.tsv $@
build/arborist/manual-synonyms.tsv: build/arborist/manual-parents.tsv
cp src/protein/data/manual-synonyms.tsv $@
build/arborist/all-peptide-assignments.tsv: build/arborist/manual-parents.tsv build/arborist/manual-synonyms.tsv
$(VENV_PYTHON) src/protein/protein_tree/assign.py -n 8
build/arborist/protein-tree.assigned: build/arborist/all-peptide-assignments.tsv
touch $@
build/arborist/mro.owl: | build/arborist/
curl -L -o $@ 'http://purl.obolibrary.org/obo/mro/2023-12-21/mro.owl'
build/arborist/mro.built: build/arborist/mro.owl | build/arborist/nanobot.db
$(eval DB := build/arborist/nanobot.db)
$(eval TABLE := mro)
sqlite3 $(DB) 'DROP TABLE IF EXISTS $(TABLE)'
ldtab init $(DB) --table $(TABLE)
ldtab import $(DB) $< --table $(TABLE)
sqlite3 $(DB) 'CREATE INDEX idx_$(TABLE)_subject ON $(TABLE)(subject)'
sqlite3 $(DB) 'CREATE INDEX idx_$(TABLE)_predicate ON $(TABLE)(predicate)'
sqlite3 $(DB) 'CREATE INDEX idx_$(TABLE)_object ON $(TABLE)(object)'
sqlite3 $(DB) 'ANALYZE $(TABLE)'
touch $@
.PHONY: mro
mro: build/arborist/mro.built
build/arborist/protein-tree.built: build/arborist/protein-tree.assigned build/arborist/organism-tree.built
$(eval DB := build/arborist/nanobot.db)
sqlite3 $(DB) 'DROP TABLE IF EXISTS protein_tree_old'
sqlite3 $(DB) 'DROP TABLE IF EXISTS protein_tree_new'
$(VENV_PYTHON) src/protein/protein_tree/build.py
sqlite3 $(DB) 'CREATE INDEX idx_protein_tree_old_subject ON protein_tree_old(subject)'
sqlite3 $(DB) 'CREATE INDEX idx_protein_tree_old_predicate ON protein_tree_old(predicate)'
sqlite3 $(DB) 'CREATE INDEX idx_protein_tree_old_object ON protein_tree_old(object)'
sqlite3 $(DB) 'ANALYZE protein_tree_old'
sqlite3 $(DB) 'CREATE INDEX idx_protein_tree_new_subject ON protein_tree_new(subject)'
sqlite3 $(DB) 'CREATE INDEX idx_protein_tree_new_predicate ON protein_tree_new(predicate)'
sqlite3 $(DB) 'CREATE INDEX idx_protein_tree_new_object ON protein_tree_new(object)'
sqlite3 $(DB) 'ANALYZE protein_tree_new'
touch $@
build/arborist/protein-tree.ttl: build/arborist/protein-tree.built | build/arborist/nanobot.db
rm -f $@
ldtab export $| $@ --table protein_tree_old
build/arborist/protein-tree.owl: build/arborist/protein-tree.ttl
robot convert -i $< -o $@
build/arborist/epitope-mappings.tsv: build/arborist/protein-tree.owl
$(VENV_PYTHON) src/protein/protein_tree/immunomebrowser.py -n 10
.PHONY: protein
protein: build/arborist/epitope-mappings.tsv
### 6. TODO Build Molecule Tree
build/arborist/molecule-tree.owl: nonpeptide-tree-20240305.owl build/arborist/protein-tree.owl
robot remove \
--input $< \
--term BFO:0000023 \
--select "self descendants" \
merge \
--input $(word 2,$^) \
annotate \
--ontology-iri https://ontology.iedb.org/ontology/molecule-tree.owl \
--version-iri https://ontology.iedb.org/ontology/$(shell date +%Y-%m-%d)/molecule-tree.owl \
--output $@
build/arborist/molecule-tree.built: build/arborist/molecule-tree.owl
$(eval DB := build/arborist/nanobot.db)
$(eval TABLE := molecule_tree)
sqlite3 $(DB) 'DROP TABLE IF EXISTS $(TABLE)'
ldtab init $(DB) --table $(TABLE)
ldtab import $(DB) $< --table $(TABLE)
sqlite3 $(DB) 'CREATE INDEX idx_$(TABLE)_subject ON $(TABLE)(subject)'
sqlite3 $(DB) 'CREATE INDEX idx_$(TABLE)_predicate ON $(TABLE)(predicate)'
sqlite3 $(DB) 'CREATE INDEX idx_$(TABLE)_object ON $(TABLE)(object)'
sqlite3 $(DB) 'ANALYZE $(TABLE)'
touch $@
.PHONY: molecule
molecule: build/arborist/molecule-tree.built
### 7. TODO Build Assay Tree
### 8. Build Disease Tree
# Copy disease-tree.owl from production.
# TODO: Build proper disease tree
build/disease-tree.owl: | build/
curl -L -k -o $@ 'https://10.0.7.92/proteins/latest/disease-tree.owl'
build/disease-tree.tsv: build/disease-tree.owl | build/arborist/nanobot.db
sqlite3 $| 'DROP TABLE IF EXISTS disease_tree'
ldtab init $| --table disease_tree
ldtab import $| $< --table disease_tree
sqlite3 $| 'CREATE INDEX idx_disease_tree_subject ON disease_tree(subject)'
sqlite3 $| 'CREATE INDEX idx_disease_tree_predicate ON disease_tree(predicate)'
sqlite3 $| 'CREATE INDEX idx_disease_tree_object ON disease_tree(object)'
sqlite3 $| 'ANALYZE disease_tree'
ldtab export $| $@ --table disease_tree
.PHONY: disease
disease: build/disease-tree.tsv
# TODO: geolocation tree, MHC tree
# TODO: merged SoT tree
# TODO: test data, symlink?
### 9. Build Leidos files
build/proteins/previous/: build/arborist/epitope-mappings.tsv
if [ -d build/proteins/latest ]; then \
rm -rf build/proteins/previous/; \
mv build/proteins/latest/ build/proteins/previous/; \
else \
mkdir -p $@; \
fi
build/proteins/latest/: build/proteins/previous/
rm -rf $@
mkdir -p $@
cp build/disease-tree.owl $@
cp build/arborist/molecule-tree.owl $@
cp build/arborist/parent-proteins.tsv $@
cp build/arborist/source-parents.tsv $@
cp build/arborist/epitope-mappings.tsv $@
chmod 644 $@*
build/proteins/latest/epitope-mappings_new.tsv: build/proteins/latest/
if [ -f build/proteins/previous/epitope-mappings.tsv ]; then \
$(VENV_PYTHON) src/util/generate_new_mappings.py build/proteins/latest/epitope-mappings.tsv build/proteins/previous/epitope-mappings.tsv $@; \
else \
echo "No previous build files."; \
touch $@; \
fi
.PHONY: leidos
leidos: build/organisms/latest/ build/proteins/previous/ build/proteins/latest/epitope-mappings_new.tsv
$(VENV_PYTHON) -m pytest test/test_leidos.py
### Nanobot Actions
#
# Editing operations for the Nanobot Arborist database.
.PHONY: save
save: | build/arborist/nanobot.db
valve-export data $| src/arborist/ table column datatype
valve-export data $| src/organism/ organism_core
valve-export data $| build/arborist/ $$(grep build src/arborist/table.tsv | cut -f1 | tr '\n' ' ')
$(VENV_PYTHON) src/organism/sort_organism_core.py src/organism/organism_core.tsv
DROPTABLES := proteomes active_species organism_core organism_tree_tsv prefix column datatype table message history
.PHONY: reload
reload: src/organism/check_organism_core.py | build/arborist/nanobot.db
sqlite3 $| $(foreach DT,$(DROPTABLES),"DROP VIEW IF EXISTS '$(DT)_text_view'" "DROP VIEW IF EXISTS '$(DT)_view'" "DROP TABLE IF EXISTS '$(DT)_conflict'" "DROP TABLE IF EXISTS '$(DT)'")
cd $(dir $|) && nanobot init
-$(VENV_PYTHON) $< $|
### Comparisons
#
# These tasks build other trees for comparison.
# Load an existing organism-tree.owl
build/arborist/organism-tree-old.built: build/arborist/organism-tree-old.owl | build/arborist/nanobot.db
sqlite3 $| 'DROP TABLE IF EXISTS organism_tree_old'
ldtab init $| --table organism_tree_old
ldtab import $| $(word 2,$^) -t organism_tree_old
sqlite3 $| 'ANALYZE organism_tree_old'
touch $@
# Load an existing subspecies-tree.owl
build/arborist/subspecies-tree-old.built: build/aborist/subspecies-tree-old.owl | build/arborist/nanobot.db
sqlite3 $| 'DROP TABLE IF EXISTS subspecies_tree_old'
ldtab init $| --table subspecies_tree_old
ldtab import $| $< --table subspecies_tree_old
sqlite3 $| 'ANALYZE subspecies_tree_old'
touch $@
# Load an existing molecule-tree.owl
build/arborist/molecule-tree-old.built: molecule-tree-20240317.owl | build/arborist/nanobot.db
$(eval TABLE := molecule_tree_old)
sqlite3 $| 'DROP TABLE IF EXISTS $(TABLE)'
ldtab init $| --table $(TABLE)
ldtab import $| $< --table $(TABLE)
sqlite3 $| 'CREATE INDEX idx_$(TABLE)_subject ON $(TABLE)(subject)'
sqlite3 $| 'CREATE INDEX idx_$(TABLE)_predicate ON $(TABLE)(predicate)'
sqlite3 $| 'CREATE INDEX idx_$(TABLE)_object ON $(TABLE)(object)'
sqlite3 $| 'ANALYZE $(TABLE)'
touch $@