-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathWF5.py
151 lines (132 loc) · 5.9 KB
/
WF5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os
import sys
import argparse
# PyCharm doesn't play well with relative imports + python console + terminal
try:
from code.reasoningtool import ReasoningUtilities as RU
except ImportError:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import ReasoningUtilities as RU
import FormatOutput
import CustomExceptions
# eg: what proteins does drug X target? One hop question
class WF5:
def __init__(self):
None
def answer(self, source_name, target_label, relationship_list, use_json=False, directed=False):
"""
Find paths of the type:
(n:chemical_substance{id:"CHEMBL.COMPOUND:CHEMBL714"})-[:physically_interacts_with]-(:protein)-[:has_phenotype]-(m:phenotypic_feature)
"""
# Get label/kind of node the source is
source_label = RU.get_node_property(source_name, "label")
# Get the subgraph (all targets along relationship)
has_intermediate_node = False
try:
g = RU.return_subgraph_paths_of_type(source_name, source_label, None, target_label, relationship_list, directed=directed)
except CustomExceptions.EmptyCypherError:
error_message = "No path between %s and %s via relationships %s" % (source_name, target_label, ','.join(relationship_list))
error_code = "NoPathsFound"
response = FormatOutput.FormatResponse(3)
response.add_error_message(error_code, error_message)
return response
# extract the source_node_number
for node, data in g.nodes(data=True):
if data['properties']['id'] == source_name:
source_node_number = node
break
# Get all the target numbers
target_numbers = []
for node, data in g.nodes(data=True):
if data['properties']['id'] != source_name:
target_numbers.append(node)
# if there's an intermediate node, get the name
if has_intermediate_node:
neighbors = list(g.neighbors(source_node_number))
if len(neighbors) > 1:
error_message = "More than one intermediate node"
error_code = "AmbiguousPath"
response = FormatOutput.FormatResponse(3)
response.add_error_message(error_code, error_message)
return response
else:
intermediate_node = neighbors.pop()
# Format the results.
if not use_json:
results_list = list()
for target_number in target_numbers:
data = g.node[target_number]
results_list.append(
{'type': list(set(data['labels'])-{'Base'}).pop(),
'name': data['properties']['name'],
'desc': data['properties']['name'],
'prob': 1}) # All these are known to be true
return results_list
else: # You want the standardized API output format
response = FormatOutput.FormatResponse(3) # it's a Q3 question
response.response.table_column_names = ["source name", "source ID", "target name", "target ID"]
source_description = g.node[source_node_number]['properties']['name']
for target_number in target_numbers:
target_description = g.node[target_number]['properties']['name']
if not has_intermediate_node:
subgraph = g.subgraph([source_node_number, target_number])
else:
subgraph = g.subgraph([source_node_number, intermediate_node, target_number])
res = response.add_subgraph(subgraph.nodes(data=True), subgraph.edges(data=True),
"%s and %s are connected by the relationships %s" % (
source_description, target_description, ','.join(relationship_list)), 1, return_result=True)
res.essence = "%s" % target_description # populate with essence of question result
row_data = [] # initialize the row data
row_data.append("%s" % source_description)
row_data.append("%s" % g.node[source_node_number]['properties']['id'])
row_data.append("%s" % target_description)
row_data.append("%s" % g.node[target_number]['properties']['id'])
res.row_data = row_data
return response
def describe(self):
output = "Answers questions of the form: 'What proteins does tranilast target?' and 'What genes are affected by " \
"Fanconi anemia?'" + "\n"
output += "You can ask: 'What X does Y Z?' where X is one of the following: \n"
for label in RU.get_node_labels():
output = output + label + "\n"
output += "\n The term Y is any of the nodes that are in our graph (currently " + str(RU.count_nodes()) + " nodes in total). \n"
output += "\n The term Z is any relationship of the following kind: \n"
for rel in RU.get_relationship_types():
rel_split = rel.split("_")
for term in rel_split:
output += term + " "
output += "\n"
output += "Assumes that Z directly connects X and Y."
return output
def main():
parser = argparse.ArgumentParser(description="Answers questions of the type 'What proteins does X target?'.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-s', '--source_name', type=str, help="Source node name.", default="CHEMBL.COMPOUND:CHEMBL714")
parser.add_argument('-t', '--target_label', type=str, help="Target node label", default="phenotypic_feature")
parser.add_argument('-r', '--rel_type_list', type=str, help="Relationship type.", default="physically_interacts_with,has_phenotype")
parser.add_argument('-j', '--json', action='store_true', help='Flag specifying that results should be printed in JSON format (to stdout)', default=False)
parser.add_argument('-d', '--describe', action='store_true', help="Describe what kinds of questions this answers.", default=False)
parser.add_argument('--directed', action='store_true', help="Treat the relationship as directed", default=False)
# Parse and check args
args = parser.parse_args()
source_name = args.source_name
target_label = args.target_label
relationship_list = args.rel_type_list.split(',')
use_json = args.json
describe_flag = args.describe
directed = args.directed
# Initialize the question class
Q = WF5()
if describe_flag:
res = Q.describe()
print(res)
else:
res = Q.answer(source_name, target_label, relationship_list, use_json, directed=directed)
if use_json:
res.print()
else:
#print(res)
for item in res:
print(item['desc'])
if __name__ == "__main__":
main()