-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathqueries.py
315 lines (285 loc) · 10.5 KB
/
queries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
from flask import session, request, Response, abort, flash
import sqlite3
from collections import defaultdict
from json2html import json2html
import json
import requests
import sys
from config import Config
import API
from lib.esQuery import indexquery
from lib.style import generate_headers
def connect_to_db(path_to_db):
# connect to SQLite at phenotype db file
conn = sqlite3.connect(path_to_db, check_same_thread=False)
# connect to PHENBASE
c1 = conn.cursor()
# connect to ICD10BASE
c2 = conn.cursor()
return c1, c2
def doc2hpo(doc2hpo_notes):
HPO_list = set()
HPO_names = set()
HPO_results = {}
# default doc2hpo text
if not doc2hpo_notes:
doc2hpo_notes=Config.doc2hpo_default
# data to be sent to api
data = {
"note": doc2hpo_notes,
"negex": True # default true for now
}
DOC2HPO_URL = Config.doc2hpo_url
r = requests.post(url=DOC2HPO_URL, json=data)
# check if doc2hpo request is successful
# if status code of response starts with 2, it is successful, otherwise something is wrong with doc2hpo
print ("hi", r.status_code, file=sys.stderr)
if int(str(r.status_code)[:1]) != 2:
r = requests.post(url='https://impact2.dbmi.columbia.edu/doc2hpo/parse/acdat', json=data)
if int(str(r.status_code)[:1]) != 2:
doc2hpo_error = "Doc2Hpo service is temporarily unavailable and cannot process clinical notes. Please manually input HPO terms instead."
flash(doc2hpo_error)
return redirect(url_for('phencards'))
res = r.json()
print ("results", res, file=sys.stderr)
res = res["hmName2Id"] # where hpo term result is grabbed
HPO_set = set()
HPO_nset = set()
negated_HPOs = set()
negated_names = set()
for i in res:
if i["negated"]:
negated_HPOs.add(i["hpoId"])
negated_names.add(i["hpoName"])
else:
HPO_set.add(i["hpoId"])
HPO_nset.add(i["hpoName"])
HPO_results[i["hpoId"]]=i["hpoName"]
# only use non-negated HPO IDs
for i in HPO_set.difference(negated_HPOs):
HPO_list.add(i)
for i in HPO_nset.difference(negated_names):
HPO_names.add(i)
HPO_list, HPO_names = list(HPO_list), list(HPO_names)
return HPO_list, HPO_names, HPO_results, res, doc2hpo_notes
def elasticquery(HPOquery,index,esettings="standard"):
# default query
query_json = \
{'query': {
"bool": {
"should": [
{
"match": {
"NAME": {
"query": HPOquery,
"fuzziness": "AUTO:0,3",
"prefix_length" : 0,
"max_expansions": 50,
"boost": 1,
"operator": "or",
}
}
},
{
"match": {
"NAME": {
"query": HPOquery,
"fuzziness": 0,
"boost": 2,
}
}
},
{
"match_phrase": {
"NAMEEXACT": {
"query": HPOquery,
"boost": 3,
}
}
},
]
}
},
"sort": {"_score": {"order": "desc"}}
}
# if looking at linked terms only
if esettings=="diseases":
query_json = \
{'query': {
"bool": {
"should": [
{
"match": {
"Linked HPO term": {
"query": HPOquery,
"fuzziness": 0,
"boost": 2,
}
}
},
]
}
},
"sort": {"_score": {"order": "desc"}}
}
# if looking at linked terms + disease names
if esettings=="hpolink":
query_json = \
{'query': {
"bool": {
"should": [
{
"match": {
"NAME": {
"query": HPOquery,
"fuzziness": "AUTO:0,3",
"prefix_length" : 0,
"max_expansions": 50,
"boost": 1,
"operator": "or",
}
}
},
{
"match": {
"Linked HPO term": {
"query": HPOquery,
"fuzziness": "AUTO:0,3",
"prefix_length" : 0,
"max_expansions": 50,
"boost": 1,
"operator": "or",
}
}
},
{
"match": {
"NAME": {
"query": HPOquery,
"fuzziness": 0,
"boost": 2,
}
}
},
{
"match": {
"Linked HPO term": {
"query": HPOquery,
"fuzziness": 0,
"boost": 3,
}
}
},
]
}
},
"sort": {"_score": {"order": "desc"}}
}
result = {'result': indexquery(query_json,index=index,size=500)['hits']['hits']} # list of results line by line in "_source"
return result
def results_page(HPOquery):
# indices: doid, msh, icd10, irs990, open990f, open990g, umls, hpo, hpolink, ohdsi
headers=generate_headers()
hpo = elasticquery(HPOquery, 'hpo')
hpo['header'] = headers['HPO']
hpolink = elasticquery(HPOquery, 'hpolink', esettings="hpolink")
diseases = defaultdict(dict)
scores = defaultdict(float)
if hpolink["result"]:
for entry in hpolink["result"]:
'''
entry = {'_index': 'hpolink', '_type': '_doc', '_id': '8dws1HQBR4YMZpofFYC7', '_score': 150.43999, '_source': {'Related Database ID': '260150', 'Database Name': 'OMIM', 'NAME': 'PALANT CLEFT PALATE SYNDROME', 'NAMEEXACT': 'PALANT CLEFT PALATE SYNDROME', 'Linked HPO ID': 'HP:0000175', 'Linked HPO term': 'Cleft palate'}}
'''
did = entry["_source"]["Related Database ID"]
if scores[did] < entry["_score"]:
diseases[did] = entry["_source"]
diseases[did]["score"] = entry["_score"]
scores[did] = entry["_score"]
hpolink["result"] = diseases
hpolink['header'] = headers['HPOlink']
doid = elasticquery(HPOquery, 'doid')
doid['header'] = headers['DO']
msh = elasticquery(HPOquery, 'msh')
msh['header'] = headers['MeSH']
icd10 = elasticquery(HPOquery, 'icd10')
icd10['header'] = headers['ICD-10']
umls = elasticquery(HPOquery, 'umls')
umls['header'] = headers['UMLS']
ohdsi = elasticquery(HPOquery, 'ohdsi')
ohdsi['header'] = headers['OHDSI']
open990f = elasticquery(HPOquery, 'open990f')
open990f['header'] = headers['990F']
open990g = elasticquery(HPOquery, 'open990g')
open990g['header'] = headers['990G']
irs990 = elasticquery(HPOquery, 'irs990')
irs990['header'] = headers['IRS']
# for top HPO hit for Phen2Gene
try:
session['HPOID']=hpo['result'][0]['_source']['HPO ID']
except IndexError:
session['HPOID']=""
# only allow internal redirect to results page
# <wiki link> https://en.wikipedia.org/wiki/Waterhouse%E2%80%93Friderichsen_syndrome
# <ICD-10 ID link> https://www.icd10data.com/search?s=A391&codebook=icd10all
# <OMIM ID link> https://www.omim.org/search/?index=entry&start=1&limit=10&sort=score+desc%2C+prefix_sort+desc&search=248340
# <HPO ID link> https://hpo.jax.org/app/browse/search?q=HP:0000377&navFilter=all
# <HPO string> https://hpo.jax.org/app/browse/search?q=DiGeorge%20syndrome&navFilter=all
cohd = {'result': API.generate_cohd_list(HPOquery)}
cohd['header'] = headers['COHD']
nihfoa = {'result': API.generate_nihfoa_list(HPOquery)}
nihfoa['header'] = headers['NIHFOA']
nihreporter = {'result': API.generate_nihreporter_list(HPOquery)}
nihreporter['header'] = headers['NIHREPORT']
phen2gene = {'result': API.phen2gene_page(session['HPOID'], patient=False)}
phen2gene['header'] = headers['P2G']
pharos = {'result': API.pharos_targets(HPOquery)}
pharos['header'] = headers['PharosTargets']
session['HPOquery'] = HPOquery.replace("_", "+").replace(" ","+")
return doid, msh, icd10, irs990, open990f, open990g, umls, hpo, hpolink, ohdsi, phen2gene, pharos, cohd, nihfoa, nihreporter
def get_results_json():
# get arguments from request
HPO_list = request.args.get('HPO_list')
if not HPO_list: # no HPO IDs provided as argument to API
results = "No HPO IDs provided"
else:
HPO_list = '10q22.3q23.3 microdeletion syndrome'
results = json.loads(results_page(HPO_list))
response = json.dumps({
"results": results,
"errors": errors
}, default=set_default)
return response
def hpo_from_phenopacket():
# for serializing set to return as JSON
def set_default(obj):
if isinstance(obj, set):
return list(obj)
raise TypeError
# transform json format to dict
data = json.loads(request.get_json(force=True))
hpo_list = ''
try:
phenopacket = data['phenopacket']
except KeyError:
abort(400, '"phenopacket" not found!')
try:
phenotypes = phenopacket['phenotypic_features']
except KeyError:
try:
phenotypes = phenopacket['phenotypicFeatures']
except KeyError:
abort(400, '"phenotypicFeatures" not found!')
item_not_found = 0
for item in phenotypes:
try:
hpo_id = item['type']['id']
if (hpo_list == ''):
hpo_list = hpo_id
else:
hpo_list += ';' + hpo_id
except KeyError:
item_not_found += 1
if (len(hpo_list) <= 0):
abort(400, 'No phenotypic features found!')
results = get_results(hpo_list, weight_model='s')
return results