-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathFindDrugsRX.py
101 lines (86 loc) · 5.26 KB
/
FindDrugsRX.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import time
from ElsevierAPI.ResnetAPI.PathwayStudioGOQL import OQL
from ElsevierAPI import open_api_session,load_api_config
import ElsevierAPI.ReaxysAPI.Reaxys_API as RxAPI
import argparse
import textwrap
from ElsevierAPI.ResnetAPI.NetworkxObjects import PSObject
from ElsevierAPI.ResnetAPI.ResnetGraph import ResnetGraph
start_time = time.time()
def GOQLtoFindDrugs(TargetIds:list, TargetType = 'Protein', drugEffect=['negative']):
if TargetType == 'Protein':
return OQL.drugs4(for_targets_with_ids=TargetIds)
elif TargetType == 'Small Molecule':
REL_TYPES = ['Regulation', 'MolSynthesis']
OQLquery = OQL.expand_entity(PropertyValues=TargetIds, SearchByProperties=['id'], expand_by_rel_types=REL_TYPES, expand2neighbors=['Small Molecule'], direction='upstream')
OQLquery += ' AND Effect = (' + ','.join(drugEffect)+')'
return OQLquery
else:
REL_TYPES = ['Regulation']
OQLquery = OQL.expand_entity(PropertyValues=TargetIds, SearchByProperties=['id'], expand_by_rel_types=REL_TYPES, expand2neighbors=['Small Molecule'], direction='upstream')
OQLquery += ' AND Effect = (' + ','.join(drugEffect)+')'
return OQLquery
if __name__ == "__main__":
instructions = '''
infile - single column file with entity names that must be modulated by drugs.
If you want to use identifiers other than names enter appropriate Propeprty types into SearchByProperties list.
target_type - indicates type of entities are in infile: Protein, Small Molecule, CellProcess, Disease, ClinicalParameter, etc. Only one type of entities is allowed in infile
resnet_props - comma-separated list of property names used for searching entities from infile in Resnet. Default: Name+Alias
effect - Find Agonists (effect=positive) or Antagonits (effect=negative). Default: negative
reaxys_prop - comma-separated list of Reaxys properties to annotate drugs found in Resnet.
Script finds drugs affecting entities in infile and then annotates them with Reaxys properties for output
'''
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,epilog=textwrap.dedent(instructions))
parser.add_argument('-i', '--infile', type=str, required=True)
parser.add_argument('-t', '--target_type', type=str, required=True, default='Disease')
parser.add_argument('-p', '--resnet_search_props', type=str, default='Name,Alias')
parser.add_argument('-a', '--resnet_retreive_props', type=str)
parser.add_argument('-e', '--effect', type=str, default='negative')
parser.add_argument('-r', '--reaxys_prop', type=str, default='')
parser.add_argument('--debug', action="store_true")
args = parser.parse_args()
TargetType = args.target_type
SearchPSprops = args.resnet_search_props.split(',')
drugEffect = [args.effect]
if args.reaxys_prop:
ReaxysFields = args.reaxys_prop.split(',')
else:
print ('No Reaxys properties specified!')
ReaxysFields = []
with open(str(args.infile)) as f:
EntitiesToExpand = [line.rstrip('\n') for line in f]
ps_api = open_api_session()
Targets = ps_api._props2psobj(EntitiesToExpand,SearchPSprops)
PSdumpFile = str(args.infile)[:len(str(args.infile))-4]+'_psdump.tsv'
ps_api.add_dump_file(PSdumpFile, replace_main_dump=True)
ps_api.entProps = args.resnet_retreive_props.split(',')
ps_api.add_ent_props('Reaxys ID', 'InChIKey')
ps_api.relProps = ['Name','Sentence','PMID','DOI','PubYear','RelationNumberOfReferences']
#Data dump columns will be ordered according to the order in this list
target_dbids = ResnetGraph.dbids(Targets)
ps_api.process_oql(GOQLtoFindDrugs(target_dbids, TargetType=TargetType, drugEffect=drugEffect))
if len(ReaxysFields) > 0:
FoundDrugs = [PSObject(y) for x,y in ps_api.Graph.nodes(data=True) if ((ps_api.Graph.out_degree(x)>0) & (y['ObjTypeName'][0] in ['Small Molecule', 'SmallMol']))]
print('Found %d drugs in Resnet' % len(FoundDrugs))
ReaxysAPI = RxAPI.Reaxys_API()
ReaxysAPI.OpenSession(load_api_config())
foundRxProps = 0
print("Start looking for Reaxys properties")
for drug in FoundDrugs:
try: inchikeys = drug['InChIKey']
except KeyError:
try: RXNIds = drug['Reaxys ID']
except KeyError: continue
ReaxysProps = ReaxysAPI.GetCompoundProps(inchikeys, 'IDE.INCHI', ReaxysFields)
if len(ReaxysProps) == 0:
ReaxysProps = ReaxysAPI.GetCompoundProps(RXNIds, 'IDE.XRN', ReaxysFields)
if len(ReaxysProps)> 0:
drug.update(ReaxysProps)
foundRxProps +=1
ReaxysAPI.disconnect()
print('Found Reaxys properties for %d out of %d Resnet drugs' % (foundRxProps, len(FoundDrugs)))
fileRx = str(args.infile)[:len(str(args.infile))-4]+ '+Rx.tsv'
EntityProps = ps_api.entProps+ReaxysFields
ps_api.Graph.print_references(fileRx, ps_api.relProps, EntityProps)
print("%d relations supported by %d references and annotated by Reaxys fields are in file: %s" % (ps_api.Graph.number_of_edges(),ps_api.Graph.size(weight='weight'),fileRx))
print("Time to fetch drugs linked to %s found in %s ---" % (str(args.infile),ps_api.execution_time(start_time)))