-
Notifications
You must be signed in to change notification settings - Fork 0
/
biofilter.py
executable file
·1143 lines (1065 loc) · 51.4 KB
/
biofilter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
"""
This script defines a command-line interface (CLI) for Biofilter, a tool for filtering, annotating, and modeling genetic data. It utilizes Python's argparse module to parse command-line arguments and provides custom type handlers for validating input values.
The script defines several custom type handlers to ensure that input arguments are correctly parsed and validated according to the expected formats and ranges:
- `yesno`: Handles boolean-like arguments, accepting values like "yes", "no", "true", "false", "on", or "off".
- `percent`: Handles percentage values, ensuring they are within the range of 0 to 100.
- `zerotoone`: Ensures that the input value is a float between 0.0 and 1.0.
- `basepairs`: Handles values representing base pairs (e.g., "1000" for 1000 base pairs, "1k" for 1000 base pairs, "1m" for 1 million base pairs, etc.).
- `typePZPV`: Handles values related to Paris-zero p-values, accepting "significant", "insignificant", or "ignore".
The CLI allows users to interact with Biofilter, providing options for specifying filtering criteria, annotation types, model generation parameters, and more.
To run the script, users can provide command-line arguments corresponding to the desired Biofilter functionalities, such as filtering genetic data, annotating variants, generating models, and configuring various parameters.
For usage instructions and available command-line options, users can invoke the script with the `-h` or `--help` flag.
Example usage:
python script.py --input-file data.txt --output-file results.txt --filter-gene ABC --annotation gwas --model-score 0.8
For detailed information on each command-line argument and its usage, please refer to the argparse module documentation.
"""
import argparse
import codecs
import collections
import csv
import itertools
import os
import random
import string
import sys
import time
from loki import loki_db
from classbiofilter import Biofilter
if __name__ == "__main__":
# define the arguments parser
version = "Biofilter version %s" % (Biofilter.getVersionString())
parser = argparse.ArgumentParser(
description=version,
add_help=False,
formatter_class=argparse.RawDescriptionHelpFormatter
)
# define custom bool-ish type handler
def yesno(val):
val = str(val).strip().lower()
if val in ('1','t','true','y','yes','on'):
return 'yes'
if val in ('0','f','false','n','no','off'):
return 'no'
raise argparse.ArgumentTypeError("'%s' must be yes/on/true/1 or no/off/false/0" % val)
#yesno()
# define custom percentage type handler
def percent(val):
val = str(val).strip().lower()
while val.endswith('%'):
val = val[:-1]
val = float(val)
if val > 100:
raise argparse.ArgumentTypeError("'%s' must be <= 100" % val)
return val
#percent()
# define custom [0.0..1.0] type handler
def zerotoone(val):
val = float(val)
if val < 0.0 or val > 1.0:
raise argparse.ArgumentTypeError("'%s' must be between 0.0 and 1.0" % (val,))
return val
#zerotoone()
# define custom basepairs handler
def basepairs(val):
val = str(val).strip().lower()
if val[-1:] == 'b':
val = val[:-1]
if val[-1:] == 'k':
val = int(val[:-1]) * 1000
elif val[-1:] == 'm':
val = int(val[:-1]) * 1000 * 1000
elif val[-1:] == 'g':
val = int(val[:-1]) * 1000 * 1000 * 1000
else:
val = int(val)
return val
#basepairs()
# define custom type handler for --paris-zero-p-values
def typePZPV(val):
val = str(val).strip().lower()
if 'significant'.startswith(val):
return 'significant'
if val == 'i':
raise argparse.ArgumentTypeError("ambiguous value: '%s' could match insignificant, ignore" % (val,))
if 'insignificant'.startswith(val):
return 'insignificant'
if 'ignore'.startswith(val):
return 'ignore'
raise argparse.ArgumentTypeError("'%s' must be significant, insignificant or ignore" % (val,))
#typePZPV()
# add general configuration section
group = parser.add_argument_group("Configuration Options")
group.add_argument('--help', '-h', action='help', help="show this help message and exit")
group.add_argument('--version', action='version', help="show all software version numbers and exit",
version=version+"""
%9s version %s
%9s version %s
%9s version %s
""" % (
"LOKI",
loki_db.Database.getVersionString(),
loki_db.Database.getDatabaseDriverName(),
loki_db.Database.getDatabaseDriverVersion(),
loki_db.Database.getDatabaseInterfaceName(),
loki_db.Database.getDatabaseInterfaceVersion()
)
)
group.add_argument('configuration', type=str, metavar='configuration_file', nargs='*', default=None,
help="a file from which to read additional options"
)
group.add_argument('--report-configuration', '--rc', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="output a report of all effective options, including any defaults, in a configuration file format which can be re-input (default: no)"
)
group.add_argument('--report-replication-fingerprint', '--rrf', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="include software versions and the knowledge database file's fingerprint values in the configuration report, to ensure the same data is used in replication (default: no)"
)
group.add_argument('--random-number-generator-seed', '--rngs', type=str, metavar='seed', nargs='?', const='', default=None,
help="seed value for the PRNG, or blank to use the sytem default (default: blank)"
)
# add knowledge database section
group = parser.add_argument_group("Prior Knowledge Options")
group.add_argument('--knowledge', '-k', type=str, metavar='file', #default=argparse.SUPPRESS,
help="the prior knowledge database file to use"
)
group.add_argument('--report-genome-build', '--rgb', type=yesno, metavar='yes/no', nargs='?', const='yes', default='yes',
help="report the genome build version number used by the knowledge database (default: yes)"
)
group.add_argument('--report-gene-name-stats', '--rgns', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="display statistics on available gene identifier types (default: no)"
)
group.add_argument('--report-group-name-stats', '--runs', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="display statistics on available group identifier types (default: no)"
)
group.add_argument('--allow-unvalidated-snp-positions', '--ausp', type=yesno, metavar='yes/no', nargs='?', const='yes', default='yes',
help="use unvalidated SNP positions in the knowledge database (default: yes)"
)
group.add_argument('--allow-ambiguous-snps', '--aas', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="use SNPs which have ambiguous loci in the knowledge database (default: no)"
)
group.add_argument('--allow-ambiguous-knowledge', '--aak', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="use ambiguous group<->gene associations in the knowledge database (default: no)"
)
group.add_argument('--reduce-ambiguous-knowledge', '--rak', type=str, metavar='no/implication/quality/any', nargs='?', const='any', default='no',
choices=['no','implication','quality','any'],
help="attempt to reduce ambiguity in the knowledge database using a heuristic strategy, from 'no', 'implication', 'quality' or 'any' (default: no)"
)
group.add_argument('--report-ld-profiles', '--rlp', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="display the available LD profiles and their properties (default: no)"
)
group.add_argument('--ld-profile', '--lp', type=str, metavar='profile', nargs='?', const=None, default=None,
help="LD profile with which to adjust regions in the knowledge database (default: none)"
)
group.add_argument('--verify-biofilter-version', type=str, metavar='version', default=None,
help="require a specific Biofilter software version to replicate results"
)
group.add_argument('--verify-loki-version', type=str, metavar='version', default=None,
help="require a specific LOKI software version to replicate results"
)
group.add_argument('--verify-source-loader', type=str, metavar=('source','version'), nargs=2, action='append', default=None,
help="require that the knowledge database was built with a specific source loader version"
)
group.add_argument('--verify-source-option', type=str, metavar=('source','option','value'), nargs=3, action='append', default=None,
help="require that the knowledge database was built with a specific source loader option"
)
group.add_argument('--verify-source-file', type=str, metavar=('source','file','date','size','md5'), nargs=5, action='append', default=None,
help="require that the knowledge database was built with a specific source file fingerprint"
)
group.add_argument('--user-defined-knowledge', '--udk', type=str, metavar='file', nargs='+', default=None,
help="file(s) from which to load user-defined knowledge"
)
group.add_argument('--user-defined-filter', '--udf', type=str, metavar='no/group/gene', default='no',
choices=['no','group','gene'],
help="method by which user-defined knowledge will also be applied as a filter on other prior knowledge, from 'no', 'group' or 'gene' (default: no)"
)
# add primary input section
group = parser.add_argument_group("Input Data Options")
group.add_argument('--snp', '-s', type=str, metavar='rs#', nargs='+', action='append', #default=argparse.SUPPRESS,
help="input SNPs, specified by RS#"
)
group.add_argument('--snp-file', '-S', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load input SNPs"
)
group.add_argument('--position', '-p', type=str, metavar='position', nargs='+', action='append', #default=argparse.SUPPRESS,
help="input positions, specified by chromosome and basepair coordinate"
)
group.add_argument('--position-file', '-P', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load input positions"
)
group.add_argument('--gene', '-g', type=str, metavar='name', nargs='+', action='append', #default=argparse.SUPPRESS,
help="input genes, specified by name"
)
group.add_argument('--gene-file', '-G', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load input genes"
)
group.add_argument('--gene-identifier-type', '--git', type=str, metavar='type', nargs='?', const='*', default='-',
help="the default type of any gene identifiers without types, or a special type '=', '-' or '*' (default: '-' for primary labels)"
)
group.add_argument('--allow-ambiguous-genes', '--aag', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="use ambiguous input gene identifiers by including all possibilities (default: no)"
)
group.add_argument('--gene-search', '--gs', type=str, metavar='text', nargs='+', action='append',
help="find input genes by searching all available names and descriptions"
)
group.add_argument('--region', '-r', type=str, metavar='region', nargs='+', action='append', #default=argparse.SUPPRESS,
help="input regions, specified by chromosome, start and stop positions"
)
group.add_argument('--region-file', '-R', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load input regions"
)
group.add_argument('--group', '-u', type=str, metavar='name', nargs='+', action='append', #default=argparse.SUPPRESS,
help="input groups, specified by name"
)
group.add_argument('--group-file', '-U', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load input groups"
)
group.add_argument('--group-identifier-type', '--uit', type=str, metavar='type', nargs='?', const='*', default='-',
help="the default type of any group identifiers without types, or a special type '=', '-' or '*' (default: '-' for primary labels)"
)
group.add_argument('--allow-ambiguous-groups', '--aau', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="use ambiguous input group identifiers by including all possibilities (default: no)"
)
group.add_argument('--group-search', '--us', type=str, metavar='text', nargs='+', action='append',
help="find input groups by searching all available names and descriptions"
)
group.add_argument('--source', '-c', type=str, metavar='name', nargs='+', action='append', #default=argparse.SUPPRESS,
help="input sources, specified by name"
)
group.add_argument('--source-file', '-C', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load input sources"
)
# add alternate input section
group = parser.add_argument_group("Alternate Input Data Options")
group.add_argument('--alt-snp', '--as', type=str, metavar='rs#', nargs='+', action='append', #default=argparse.SUPPRESS,
help="alternate input SNPs, specified by RS#"
)
group.add_argument('--alt-snp-file', '--AS', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load alternate input SNPs"
)
group.add_argument('--alt-position', '--ap', type=str, metavar='position', nargs='+', action='append', #default=argparse.SUPPRESS,
help="alternate input positions, specified by chromosome and basepair coordinate"
)
group.add_argument('--alt-position-file', '--AP', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load alternate input positions"
)
group.add_argument('--alt-gene', '--ag', type=str, metavar='name', nargs='+', action='append', #default=argparse.SUPPRESS,
help="alternate input genes, specified by name"
)
group.add_argument('--alt-gene-file', '--AG', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load alternate input genes"
)
group.add_argument('--alt-gene-search', '--ags', type=str, metavar='text', nargs='+', action='append',
help="find alternate input genes by searching all available names and descriptions"
)
group.add_argument('--alt-region', '--ar', type=str, metavar='region', nargs='+', action='append', #default=argparse.SUPPRESS,
help="alternate input regions, specified by chromosome, start and stop positions"
)
group.add_argument('--alt-region-file', '--AR', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load alternate input regions"
)
group.add_argument('--alt-group', '--au', type=str, metavar='name', nargs='+', action='append', #default=argparse.SUPPRESS,
help="alternate input groups, specified by name"
)
group.add_argument('--alt-group-file', '--AU', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load alternate input groups"
)
group.add_argument('--alt-group-search', '--aus', type=str, metavar='text', nargs='+', action='append',
help="find alternate input groups by searching all available names and descriptions"
)
group.add_argument('--alt-source', '--ac', type=str, metavar='name', nargs='+', action='append', #default=argparse.SUPPRESS,
help="alternate input sources, specified by name"
)
group.add_argument('--alt-source-file', '--AC', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load alternate input sources"
)
# add positional section
group = parser.add_argument_group("Positional Matching Options")
group.add_argument('--grch-build-version', '--gbv', type=int, metavar='version', default=None,
help="the GRCh# human reference genome build version of position and region inputs",
)
group.add_argument('--ucsc-build-version', '--ubv', type=int, metavar='version', default=None,
help="the UCSC hg# human reference genome build version of position and region inputs",
)
group.add_argument('--coordinate-base', '--cb', type=int, metavar='offset', default=1,
help="the coordinate base for position and region inputs and outputs (default: 1)",
)
group.add_argument('--regions-half-open', '--rho', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="whether input and output regions are 'half-open' intervals and should not include their end coordinate (default: no)",
)
group.add_argument('--region-position-margin', '--rpm', type=basepairs, metavar='bases', default=0,
help="number of bases beyond the bounds of known regions where positions should still be matched (default: 0)"
)
group.add_argument('--region-match-percent', '--rmp', type=percent, metavar='percentage', default=None, # default set later, with -bases
help="minimum percentage of overlap between two regions to consider them a match (default: 100)"
)
group.add_argument('--region-match-bases', '--rmb', type=basepairs, metavar='bases', default=None, # default set later, with -percent
help="minimum number of bases of overlap between two regions to consider them a match (default: 0)"
)
# add modeling section
group = parser.add_argument_group("Model-Building Options")
group.add_argument('--maximum-model-count', '--mmc', type=int, metavar='count', nargs='?', const=0, default=0,
help="maximum number of models to generate, or < 1 for unlimited (default: unlimited)"
)
group.add_argument('--alternate-model-filtering', '--amf', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="apply primary input filters to only one side of generated models (default: no)"
)
group.add_argument('--all-pairwise-models', '--apm', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="generate all comprehensive pairwise models without regard to any prior knowledge (default: no)"
)
group.add_argument('--maximum-model-group-size', '--mmgs', type=int, metavar='size', default=30,
help="maximum size of a group to use for knowledge-supported models, or < 1 for unlimited (default: 30)"
)
group.add_argument('--minimum-model-score', '--mms', type=int, metavar='score', default=2,
help="minimum implication score for knowledge-supported models (default: 2)"
)
group.add_argument('--sort-models', '--sm', type=yesno, metavar='yes/no', nargs='?', const='yes', default='yes',
help="output knowledge-supported models in order of descending score (default: yes)"
)
# add PARIS section
group = parser.add_argument_group("PARIS Options")
group.add_argument('--paris-p-value', '--ppv', type=zerotoone, metavar='p-value', default=0.05,
help="maximum p-value of input results to be considered significant (default: 0.05)"
)
group.add_argument('--paris-zero-p-values', '--pzpv', type=typePZPV, metavar='sig/insig/ignore', default='ignore',
help="how to consider input result p-values of zero (default: ignore)"
)
group.add_argument('--paris-max-p-value', '--pmpv', type=zerotoone, metavar='p-value', default=None,
help="maximum meaningful permutation p-value (default: none)"
)
group.add_argument('--paris-enforce-input-chromosome', '--peic', type=yesno, metavar='yes/no', nargs='?', const='yes', default='yes',
help="limit input result SNPs to positions on the specified chromosome (default: yes)"
)
group.add_argument('--paris-permutation-count', '--ppc', type=int, metavar='number', default=1000,
help="number of permutations to perform on each group and gene (default: 1000)"
)
group.add_argument('--paris-bin-size', '--pbs', type=int, metavar='number', default=10000,
help="ideal number of features per bin (default: 10000)"
)
group.add_argument('--paris-snp-file', '--PS', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load SNP results"
)
group.add_argument('--paris-position-file', '--PP', type=str, metavar='file', nargs='+', action='append', #default=argparse.SUPPRESS,
help="file(s) from which to load position results"
)
group.add_argument('--paris-details', '--pd', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="generate the PARIS detail report (default: no)"
)
# add output section
group = parser.add_argument_group("Output Options")
group.add_argument('--quiet', '-q', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="don't print any warnings or log messages to <stdout> (default: no)"
)
group.add_argument('--verbose', '-v', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="print additional informational log messages to <stdout> (default: no)"
)
group.add_argument('--prefix', type=str, metavar='prefix', default='biofilter',
help="prefix to use for all output filenames; may contain path components (default: 'biofilter')"
)
group.add_argument('--overwrite', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="overwrite any existing output files (default: no)",
)
group.add_argument('--stdout', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="display all output data directly on <stdout> rather than writing to any files (default: no)"
)
group.add_argument('--report-invalid-input', '--rii', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no',
help="report invalid input data lines in a separate output file for each type (default: no)"
)
group.add_argument('--filter', '-f', type=str, metavar='type', nargs='+', action='append',
help="data types or columns to include in the filtered output"
)
group.add_argument('--annotate', '-a', type=str, metavar='type', nargs='+', action='append',
help="data types or columns to include in the annotated output"
)
group.add_argument('--model', '-m', type=str, metavar='type', nargs='+', action='append',
help="data types or columns to include in the output models"
)
group.add_argument('--paris', type=str, metavar='yes/no', nargs='?', const='yes', default='no',
help="perform a PARIS analysis with the provided input data (default: no)"
)
# add hidden options
parser.add_argument('--end-of-line', action='store_true', help=argparse.SUPPRESS)
parser.add_argument('--allow-duplicate-output', '--ado', type=yesno, metavar='yes/no', nargs='?', const='yes', default='no', help=argparse.SUPPRESS)
parser.add_argument('--debug-logic', action='store_true', help=argparse.SUPPRESS)
parser.add_argument('--debug-query', action='store_true', help=argparse.SUPPRESS)
parser.add_argument('--debug-profile', action='store_true', help=argparse.SUPPRESS)
# if there are no arguments, just print usage and exit
if len(sys.argv) < 2:
print (version)
print
parser.print_usage()
print
print ("Use -h for details.")
sys.exit(2)
#if no args
"""
This part of the script handles the generation of various reports based on the provided options and configurations:
1. **OrderedNamespace**: Defines a custom namespace class that preserves the order of attribute additions.
2. **cfDialect**: Defines a custom CSV dialect named `cfDialect` for configuration files, ensuring compatibility with quoted substrings.
3. **parseCFile**: A recursive function to parse configuration files, supporting 'include' directives and cyclic include detection. It populates the `OrderedNamespace` with parsed arguments.
4. **Parsing Command Line for Configuration Files**: Parses command-line arguments to identify configuration files and re-parses them to override the previous configurations.
5. **Identifying Output Paths**: Determines the paths for various types of reports, filtering results, annotations, and models based on user-specified options.
6. **Verification and Error Handling**: Verifies the uniqueness, writability, and non-existence of output files. It also handles errors related to conflicting file paths and overwriting.
7. **Attaching Knowledge Database**: Attaches a knowledge database file if provided in the options.
8. **Verifying Replication Fingerprint**: Verifies the replication fingerprint, including Biofilter and LOKI versions, source loader versions, options, and file hashes.
9. **Processing Reports**: Writes various reports based on user options, such as configuration file details, gene name statistics, group name statistics, and LD profiles.
10. **Output Helper Functions**: Defines utility functions to encode strings, lines, and rows into UTF-8 format for writing to files.
11. **Generating Reports**: Iterates through different types of reports, writes them to respective files, and logs the process.
This part of the script is responsible for generating and writing various reports based on user-defined configurations and input data.
"""
# define an argparse.Namespace that remembers the order in which attributes are added
class OrderedNamespace(argparse.Namespace):
def __setattr__(self, name, value):
if name != '__OrderedDict':
if '__OrderedDict' not in self.__dict__:
self.__dict__['__OrderedDict'] = collections.OrderedDict()
self.__dict__['__OrderedDict'][name] = None
super(OrderedNamespace,self).__setattr__(name, value)
def __delattr__(self, name):
if name != '__OrderedDict':
if '__OrderedDict' in self.__dict__:
del self.__dict__['__OrderedDict'][name]
super(OrderedNamespace,self).__delattr__(name)
def __iter__(self):
return (self.__dict__['__OrderedDict'] or []).__iter__()
#OrderedNamespace
# define a CSV dialect for conf files (to support "quoted substrings")
class cfDialect(csv.Dialect):
delimiter = ' '
doublequote = False
escapechar = '\\'
lineterminator = '\n'
quotechar = '"'
quoting = csv.QUOTE_MINIMAL
skipinitialspace = True
#cfDialect
# define a recursive function to parse conf files (to support 'include')
options = parser.parse_args(args=[], namespace=OrderedNamespace())
cfStack = list()
def parseCFile(cfName):
# check for cycles
cfAbs = ('<stdin>' if cfName == '-' else os.path.abspath(cfName))
if cfAbs in cfStack:
sys.exit("ERROR: configuration files include eachother in a loop! %s" % (' -> '.join(cfStack + [cfAbs])))
cfStack.append(cfAbs)
# set up iterators
cfHandle = (sys.stdin if cfName == '-' else open(cfName,'r'))
cfStream = (line.replace('\t',' ').strip() for line in cfHandle)
cfLines = (line for line in cfStream if line and not line.startswith('#'))
cfReader = csv.reader(cfLines, dialect=cfDialect)
# parse the file; recurse for includes, store the rest
cfArgs = list()
for line in cfReader:
line[0] = '--' + line[0].lower().replace('_','-')
if line[0] == '--include':
for l in range(1,len(line)):
parseCFile(line[l])
else:
cfArgs.extend(line)
cfArgs.append('--end-of-line')
#foreach line
# close the stream and try to parse the args
if cfHandle != sys.stdin:
cfHandle.close()
try:
parser.parse_args(args=cfArgs, namespace=options)
# if extra arguments are given to an otherwise correct option,
# they'll end up in 'configuration' because it accepts nargs=*
if options.configuration:
raise Exception("unexpected argument(s): %s" % (' '.join(options.configuration)))
except:
print ("(in configuration file '%s')" % cfName)
raise
# pop the stack and return
assert(cfStack[-1] == cfAbs)
cfStack.pop()
#parseCFile()
# parse the command line for any configuration files, then re-parse to override them
for cfName in (parser.parse_args()).configuration:
parseCFile(cfName)
parser.parse_args(namespace=options)
bio = Biofilter(options)
empty = list()
# identify all the reports we need to output
typeOutputPath = collections.OrderedDict()
typeOutputPath['report'] = collections.OrderedDict()
if options.report_configuration == 'yes':
typeOutputPath['report']['configuration'] = options.prefix + '.configuration'
if options.report_gene_name_stats == 'yes':
typeOutputPath['report']['gene name statistics'] = options.prefix + '.gene-names'
if options.report_group_name_stats == 'yes':
typeOutputPath['report']['group name statistics'] = options.prefix + '.group-names'
if options.report_ld_profiles == 'yes':
typeOutputPath['report']['LD profiles'] = options.prefix + '.ld-profiles'
# define invalid input handlers, if requested
typeOutputPath['invalid'] = collections.OrderedDict()
cb = collections.defaultdict(bool)
cbLog = collections.OrderedDict()
cbMake = lambda modtype: lambda line,err: cbLog[modtype].extend(["# %s" % (err or "(unknown error"), str(line).rstrip()])
if options.report_invalid_input == 'yes':
for itype in ['SNP','position','region','gene','group','source']:
for mod in ['','alt-']:
typeOutputPath['invalid'][mod+itype] = options.prefix + '.invalid.' + mod+itype.lower()
cbLog[mod+itype] = list()
for itype in ['userknowledge']:
typeOutputPath['invalid'][itype] = options.prefix + '.invalid.' + itype.lower()
cbLog[itype] = list()
#if report invalid input
# identify all the filtering results we need to output
typeOutputPath['filter'] = collections.OrderedDict()
for types in (options.filter or empty):
if types:
typeOutputPath['filter'][tuple(types)] = options.prefix + '.' + '-'.join(types)
else:
# ignore empty filters
pass
#foreach requested filter
# identify all the annotation results we need to output
typeOutputPath['annotation'] = collections.OrderedDict()
if options.snp or options.snp_file:
userInputType = ['snpinput']
elif options.position_file or options.position:
userInputType = ['positioninput']
elif options.gene or options.gene_file or options.gene_search:
userInputType = ['geneinput']
elif options.region or options.region_file:
userInputType = ['regioninput']
elif options.group or options.group_file or options.group_search:
userInputType = ['groupinput']
elif options.source or options.source_file:
userInputType = ['sourceinput']
else:
userInputType = []
for types in (options.annotate or empty):
n = types.count(':')
if n > 1:
sys.exit("ERROR: cannot annotate '%s', only two sets of outputs are allowed\n" % (' '.join(types),))
elif n:
i = types.index(':')
typesF = userInputType + types[:i]
typesA = types[i+1:None]
else:
typesF = userInputType + types[0:1]
typesA = types[1:None]
if typesF and typesA:
typeOutputPath['annotation'][(tuple(typesF),tuple(typesA))] = options.prefix + '.' + '-'.join(typesF[1:]) + '.' + '-'.join(typesA)
elif typesF:
bio.warn("WARNING: annotating '%s' is equivalent to filtering '%s'\n" % (' '.join(types),' '.join(typesF)))
typeOutputPath['filter'][tuple(typesF)] = options.prefix + '.' + '-'.join(typesF)
elif typesA:
sys.exit("ERROR: cannot annotate '%s' with no starting point\n" % (' '.join(types),))
else:
# ignore empty annotations
pass
#foreach requested annotation
# identify all the model results we need to output
typeOutputPath['models'] = collections.OrderedDict()
for types in (options.model or empty):
n = types.count(':')
if n > 1:
sys.exit("ERROR: cannot model '%s', only two sets of outputs are allowed\n" % (' '.join(types),))
elif n:
i = types.index(':')
typesL = types[:i]
typesR = types[i+1:None]
else:
typesL = typesR = types
if not (typesL or typesR):
# ignore empty models
pass
elif not (typesL and typesR):
sys.exit("ERROR: cannot model '%s', both sides require at least one output type\n" % ' '.join(types))
elif typesL == typesR:
typeOutputPath['models'][(tuple(typesL),tuple(typesR))] = options.prefix + '.' + '-'.join(typesL) + '.models'
else:
typeOutputPath['models'][(tuple(typesL),tuple(typesR))] = options.prefix + '.' + '-'.join(typesL) + '.' + '-'.join(typesR) + '.models'
#foreach requested model
# identify all the PARIS result files we need to output
typeOutputPath['paris'] = collections.OrderedDict()
if options.paris == 'yes':
typeOutputPath['paris']['summary'] = options.prefix + '.paris-summary'
if options.paris_details == 'yes':
typeOutputPath['paris']['detail'] = options.prefix + '.paris-detail'
# verify that all output files are unique, writeable and nonexistant (unless overwriting)
typeOutputInfo = dict()
pathUsed = dict()
for outtype,outputPath in typeOutputPath.items():
typeOutputInfo[outtype] = collections.OrderedDict()
for output,path in outputPath.items():
if outtype == 'report':
label = "%s report" % (output,)
elif outtype == 'invalid':
label = "invalid %s input report" % (output,)
elif outtype == 'filter':
label = "'%s' filter" % (" ".join(output),)
elif outtype == 'annotation':
label = "'%s : %s' annotation" % (" ".join(output[0][1:])," ".join(output[1]))
elif outtype == 'models':
if output[0] == output[1]:
label = "'%s' models" % (" ".join(output[0]),)
else:
label = "'%s : %s' models" % (" ".join(output[0])," ".join(output[1]))
elif outtype == 'paris':
label = "PARIS %s report" % (output,)
else:
raise Exception("unexpected output type")
if options.debug_logic == 'yes':
bio.warn("%s will be written to '%s'\n" % (label,('<stdout>' if options.stdout == 'yes' else path)))
if options.stdout == 'yes':
path = '<stdout>'
elif path in pathUsed:
sys.exit("ERROR: cannot write %s to '%s', file is already reserved for %s\n" % (label,path,pathUsed[path]))
elif os.path.exists(path):
if options.overwrite == 'yes':
bio.warn("WARNING: %s file '%s' already exists and will be overwritten\n" % (label,path))
else:
sys.exit("ERROR: %s file '%s' already exists, must specify --overwrite or a different --prefix\n" % (label,path))
pathUsed[path] = label
file = sys.stdout if options.stdout == 'yes' else (open(path,'wb') if outtype != 'invalid' else None)
typeOutputInfo[outtype][output] = (label,path,file)
if outtype == 'invalid':
cb[output] = cbMake(output)
#foreach output of type
#foreach output type
# attach the knowledge file, if provided
if options.knowledge:
dbPath = options.knowledge
if not os.path.exists(dbPath):
cwdDir = os.path.dirname(os.path.realpath(os.path.abspath(os.getcwd())))
myDir = os.path.dirname(os.path.realpath(os.path.abspath(__file__)))
if not os.path.samefile(cwdDir, myDir):
dbPath = os.path.join(myDir, options.knowledge)
if not os.path.exists(dbPath):
sys.exit("ERROR: knowledge database file '%s' not found in '%s' or '%s'" % (options.knowledge, cwdDir, myDir))
else:
sys.exit("ERROR: knowledge database file '%s' not found" % (options.knowledge))
bio.attachDatabaseFile(dbPath)
#if knowledge
# verify the replication fingerprint, if requested
sourceVerify = collections.defaultdict(lambda: [None,None,None])
for source,version in (options.verify_source_loader or empty):
sourceVerify[source][0] = version
for source,option,value in (options.verify_source_option or empty):
if not sourceVerify[source][1]:
sourceVerify[source][1] = dict()
sourceVerify[source][1][option] = value
for source,file,date,size,md5 in (options.verify_source_file or empty):
if not sourceVerify[source][2]:
sourceVerify[source][2] = dict()
sourceVerify[source][2][file] = (date,int(size),md5)
if sourceVerify or options.verify_biofilter_version or options.verify_loki_version:
bio.logPush("verifying replication fingerprint ...\n")
if options.verify_biofilter_version and (options.verify_biofilter_version != Biofilter.getVersionString()):
sys.exit("ERROR: configuration requires Biofilter version %s, but this is version %s\n" % (options.verify_biofilter_version, Biofilter.getVersionString()))
if options.verify_loki_version and (options.verify_loki_version != loki_db.Database.getVersionString()):
sys.exit("ERROR: configuration requires LOKI version %s, but this is version %s\n" % (options.verify_loki_version, loki_db.Database.getVersionString()))
for source in sorted(sourceVerify):
verify = sourceVerify[source]
sourceID = bio._loki.getSourceID(source)
if not sourceID:
sys.exit("ERROR: cannot verify %s fingerprint, knowledge database contains no such source\n" % (source,))
version = bio._loki.getSourceIDVersion(sourceID)
if verify[0] and verify[0] != version:
sys.exit("ERROR: configuration requires %s loader version %s, but knowledge database reports version %s\n" % (source,verify[0],version))
if verify[1]:
options = bio._loki.getSourceIDOptions(sourceID)
for opt,val in verify[1].items():
if opt not in options or val != options[opt]:
sys.exit("ERROR: configuration requires %s loader option %s = %s, but knowledge database reports setting = %s\n" % (source,opt,val,options.get(opt)))
if verify[2]:
files = bio._loki.getSourceIDFiles(sourceID)
for file,meta in verify[2].items():
if file not in files:
sys.exit("ERROR: configuration requires a specific fingerprint for %s file '%s', but knowledge database reports no such file\n" % (source,file))
# size and hash should be sufficient comparisons, and some sources (KEGG,PharmGKB) don't provide data file timestamps anyway
#elif meta[0] != files[file][0]:
# sys.exit("ERROR: configuration requires %s file '%s' modification date '%s', but knowledge database reports '%s'\n" % (source,file,meta[0],files[file][0]))
elif meta[1] != files[file][1]:
sys.exit("ERROR: configuration requires %s file '%s' size %s, but knowledge database reports %s\n" % (source,file,meta[1],files[file][1]))
elif meta[2] != files[file][2]:
sys.exit("ERROR: configuration requires %s file '%s' hash '%s', but knowledge database reports '%s'\n" % (source,file,meta[2],files[file][2]))
#foreach source
bio.logPop("... OK\n")
#if verify replication fingerprint
# set default region_match_percent/bases
if (options.region_match_bases != None) and (options.region_match_percent == None):
bio.warn("WARNING: ignoring default region match percent (100) in favor of user-specified region match bases (%d)\n" % options.region_match_bases)
options.region_match_percent = None
else:
if options.region_match_bases == None:
options.region_match_bases = 0
if options.region_match_percent == None:
options.region_match_percent = 100.0
#if rmb/rmp
# set the PRNG seed, if requested
if options.random_number_generator_seed != None:
try:
seed = int(options.random_number_generator_seed)
except ValueError:
seed = options.random_number_generator_seed or None
bio.warn("random number generator seed: %s\n" % (repr(seed) if (seed != None) else '<system default>',))
random.seed(seed)
#if rngs
# report the genome build, if requested
grchBuildDB,ucscBuildDB = bio.getDatabaseGenomeBuilds()
if options.report_genome_build == 'yes':
bio.warn("knowledge database genome build: GRCh%s / UCSC hg%s\n" % (grchBuildDB or '?', ucscBuildDB or '?'))
#if genome build
# parse input genome build version(s)
grchBuildUser,ucscBuildUser = bio.getInputGenomeBuilds(options.grch_build_version, options.ucsc_build_version)
if grchBuildUser or ucscBuildUser:
bio.warn("user input genome build: GRCh%s / UCSC hg%s\n" % (grchBuildUser or '?', ucscBuildUser or '?'))
# define output helper functions
utf8 = codecs.getencoder('utf8')
def encodeString(string):
return utf8(string)[0]
def encodeLine(line, term="\n"):
return utf8("%s%s" % (line,term))[0]
def encodeRow(row, term="\n", delim="\t"):
return utf8("%s%s" % ((delim.join((col if isinstance(col,str) else str('' if col == None else col)) for col in row)),term))[0]
# process reports
for report,info in typeOutputInfo['report'].items():
label,path,outfile = info
bio.logPush("writing %s to '%s' ...\n" % (label,path))
if report == 'configuration':
outfile.write(encodeLine("# Biofilter configuration file"))
outfile.write(encodeLine("# generated %s" % time.strftime('%a, %d %b %Y %H:%M:%S')))
outfile.write(encodeLine("# Biofilter version %s" % Biofilter.getVersionString()))
outfile.write(encodeLine("# LOKI version %s" % loki_db.Database.getVersionString()))
outfile.write(encodeLine(""))
if options.report_replication_fingerprint == 'yes':
outfile.write(encodeLine("%-35s \"%s\"" % ('VERIFY_BIOFILTER_VERSION', Biofilter.getVersionString(),)))
outfile.write(encodeLine("%-35s \"%s\"" % ('VERIFY_LOKI_VERSION', loki_db.Database.getVersionString(),)))
for source,fingerprint in bio.getSourceFingerprints().items():
outfile.write(encodeLine("%-35s %s \"%s\"" % ('VERIFY_SOURCE_LOADER',source,fingerprint[0])))
for srcopt in sorted(fingerprint[1]):
outfile.write(encodeLine("%-35s %s %s " % ('VERIFY_SOURCE_OPTION',source,srcopt), term=""))
outfile.write(encodeRow(fingerprint[1][srcopt], delim=" "))
for srcfile in sorted(fingerprint[2]):
outfile.write(encodeLine("%-35s %s \"%s\" " % ('VERIFY_SOURCE_FILE',source,srcfile), term=""))
outfile.write(encodeRow((('"%s"' % col) for col in fingerprint[2][srcfile]), delim=" "))
outfile.write(encodeLine(""))
for opt in options:
if opt in ('configuration','verify_source_loader','verify_source_option','verify_source_file') or not hasattr(options, opt):
continue
val = getattr(options, opt)
if type(val) == bool: # --end-of-line, --debug-*
continue
opt = "%-35s" % opt.upper().replace('-','_')
# three possibilities: simple value, list of simple values, or list of lists of simple values
if isinstance(val,list) and len(val) and isinstance(val[0],list):
for subvals in val:
if len(subvals):
outfile.write(encodeRow(itertools.chain([opt],subvals), delim=" "))
else:
outfile.write(encodeLine(opt))
elif isinstance(val,list):
if len(val):
outfile.write(encodeRow(itertools.chain([opt],val), delim=" "))
else:
outfile.write(encodeLine(opt))
elif val != None:
outfile.write(encodeRow([opt,val], delim=" "))
#foreach option
elif report == 'gene name statistics':
outfile.write(encodeRow(['#type','names','unique','ambiguous']))
for row in bio.generateGeneNameStats():
outfile.write(encodeRow(row))
elif report == 'group name statistics':
outfile.write(encodeRow(['#type','names','unique','ambiguous']))
for row in bio.generateGroupNameStats():
outfile.write(encodeRow(row))
elif report == 'LD profiles':
outfile.write(encodeRow(['#ldprofile','description','metric','value']))
for row in bio.generateLDProfiles():
outfile.write(encodeRow(row))
else:
raise Exception("unexpected report type")
#which report
if outfile != sys.stdout:
outfile.close()
bio.logPop("... OK\n")
#foreach report
# load user-defined knowledge, if any
for path in (options.user_defined_knowledge or empty):
bio.loadUserKnowledgeFile(path, options.gene_identifier_type, errorCallback=cb['userknowledge'])
if options.user_defined_filter != 'no':
bio.applyUserKnowledgeFilter((options.user_defined_filter == 'group'))
# apply primary filters
for snpList in (options.snp or empty):
bio.intersectInputSNPs(
'main',
bio.generateRSesFromText(snpList, separator=':', errorCallback=cb['SNP']),
errorCallback=cb['SNP']
)
for snpFileList in (options.snp_file or empty):
bio.intersectInputSNPs(
'main',
bio.generateRSesFromRSFiles(snpFileList, errorCallback=cb['SNP']),
errorCallback=cb['SNP']
)
for positionList in (options.position or empty):
bio.intersectInputLoci(
'main',
bio.generateLiftOverLoci(
ucscBuildUser, ucscBuildDB,
bio.generateLociFromText(positionList, separator=':', applyOffset=True, errorCallback=cb['position']),
errorCallback=cb['position']
),
errorCallback=cb['position']
)
for positionFileList in (options.position_file or empty):
bio.intersectInputLoci(
'main',
bio.generateLiftOverLoci(
ucscBuildUser, ucscBuildDB,
bio.generateLociFromMapFiles(positionFileList, applyOffset=True, errorCallback=cb['position']),
errorCallback=cb['position']
),
errorCallback=cb['position']
)
for geneList in (options.gene or empty):
bio.intersectInputGenes(
'main',
bio.generateNamesFromText(geneList, options.gene_identifier_type, separator=':', errorCallback=cb['gene']),
errorCallback=cb['gene']
)
for geneFileList in (options.gene_file or empty):
bio.intersectInputGenes(
'main',
bio.generateNamesFromNameFiles(geneFileList, options.gene_identifier_type, errorCallback=cb['gene']),
errorCallback=cb['gene']
)
for geneSearch in (options.gene_search or empty):
bio.intersectInputGeneSearch(
'main',
(2*(encodeString(s),) for s in geneSearch)
)
for regionList in (options.region or empty):
bio.intersectInputRegions(
'main',
bio.generateLiftOverRegions(
ucscBuildUser, ucscBuildDB,
bio.generateRegionsFromText(regionList, separator=':', applyOffset=True, errorCallback=cb['region']),
errorCallback=cb['region']
),
errorCallback=cb['region']
)
for regionFileList in (options.region_file or empty):
bio.intersectInputRegions(
'main',
bio.generateLiftOverRegions(
ucscBuildUser, ucscBuildDB,
bio.generateRegionsFromFiles(regionFileList, applyOffset=True, errorCallback=cb['region']),
errorCallback=cb['region']
),
errorCallback=cb['region']
)
for groupList in (options.group or empty):
bio.intersectInputGroups(
'main',
bio.generateNamesFromText(groupList, options.group_identifier_type, separator=':', errorCallback=cb['group']),
errorCallback=cb['group']
)
for groupFileList in (options.group_file or empty):
bio.intersectInputGroups(
'main',
bio.generateNamesFromNameFiles(groupFileList, options.group_identifier_type, errorCallback=cb['group']),
errorCallback=cb['group']
)
for groupSearch in (options.group_search or empty):
bio.intersectInputGroupSearch(
'main',
(2*(encodeString(s),) for s in groupSearch)
)
for sourceList in (options.source or empty):
bio.intersectInputSources(
'main',
sourceList,
errorCallback=cb['source']
)
for sourceFile in itertools.chain(*(options.source_file or empty)):
bio.intersectInputSources(
'main',
itertools.chain(*(line for line in open(sourceFile,'r'))),
errorCallback=cb['source']
)
# apply alternate filters
for snpList in (options.alt_snp or empty):
bio.intersectInputSNPs(
'alt',
bio.generateRSesFromText(snpList, separator=':', errorCallback=cb['alt-SNP']),
errorCallback=cb['alt-SNP']
)
for snpFileList in (options.alt_snp_file or empty):
bio.intersectInputSNPs(
'alt',
bio.generateRSesFromRSFiles(snpFileList, errorCallback=cb['alt-SNP']),
errorCallback=cb['alt-SNP']
)
for positionList in (options.alt_position or empty):
bio.intersectInputLoci(
'alt',
bio.generateLiftOverLoci(
ucscBuildUser, ucscBuildDB,
bio.generateLociFromText(positionList, separator=':', applyOffset=True, errorCallback=cb['alt-position']),
errorCallback=cb['alt-position']),
errorCallback=cb['alt-position']
)
for positionFileList in (options.alt_position_file or empty):
bio.intersectInputLoci(
'alt',
bio.generateLiftOverLoci(
ucscBuildUser, ucscBuildDB,
bio.generateLociFromMapFiles(positionFileList, applyOffset=True, errorCallback=cb['alt-position']),
errorCallback=cb['alt-position']
),
errorCallback=cb['alt-position']
)
for geneList in (options.alt_gene or empty):
bio.intersectInputGenes(
'alt',
bio.generateNamesFromText(geneList, options.gene_identifier_type, separator=':', errorCallback=cb['alt-gene']),
errorCallback=cb['alt-gene']
)
for geneFileList in (options.alt_gene_file or empty):
bio.intersectInputGenes(
'alt',
bio.generateNamesFromNameFiles(geneFileList, options.gene_identifier_type, errorCallback=cb['alt-gene']),
errorCallback=cb['alt-gene']