-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathCOHDUtilities.py
91 lines (75 loc) · 3.01 KB
/
COHDUtilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# A collection of scripts to interact with Columbia open health data
import os
import sys
import argparse
import math
# PyCharm doesn't play well with relative imports + python console + terminal
try:
from code.reasoningtool import ReasoningUtilities as RU
except ImportError:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import ReasoningUtilities as RU
import FormatOutput
import networkx as nx
try:
from QueryCOHD import QueryCOHD
except ImportError:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from QueryCOHD import QueryCOHD
import CustomExceptions
class COHDUtilities:
def __init__(self):
None
@staticmethod
def get_conditions_treating(drug_description, conservative=False):
"""
Get all the conditions that are associated with a drug.
:param drug_description: string (eg. 'Naproxen')
:param conservative: bool (True= use exact matching for mapping drug to COHD, False = use all synonyms returned by COHD)
:return: dictionary of dictionaries (eg. keys are concept IDs, values look like:
{'associated_concept_id': 134736,
'associated_concept_name': 'Backache',
'concept_count': 112,
'concept_frequency': 2.101665438505926e-05,
'concept_id': 1115008}
"""
# Get the concept ID of the drug
drug_concepts = QueryCOHD.find_concept_ids(drug_description)
drug_ids = []
if conservative:
for concept in drug_concepts:
if concept['concept_name'].lower() == drug_description.lower():
drug_ids.append(concept['concept_id'])
if not conservative:
for concept in drug_concepts:
drug_ids.append(concept['concept_id'])
# get all the associated conditions
associated_concepts = []
for drug_id in drug_ids:
associated_concepts += QueryCOHD.get_associated_concept_domain_freq(str(drug_id), "Condition")
print(len(associated_concepts))
# go through and sum them all up (no need for conservative flag since that will only be a single one)
# get all the unique condition ids
associated_concept_ids = set()
for concept in associated_concepts:
associated_concept_ids.add(concept['associated_concept_id'])
# go through the associated conditions, summing up the concept counts
result_dict = dict()
for associated_concept in associated_concepts:
id = associated_concept['associated_concept_id']
if id in result_dict:
result_dict[id]['concept_count'] += associated_concept['concept_count']
else:
result_dict[id] = associated_concept
# We'll need to adjust the frequencies in terms of the total patients treated with this drug
total_associated_condition_counts = 0
for id in result_dict:
total_associated_condition_counts += result_dict[id]['concept_count']
for id in result_dict:
result_dict[id]['concept_frequency'] = result_dict[id]['concept_count'] / float(total_associated_condition_counts)
return result_dict
if __name__ == "__main__":
q = COHDUtilities()
print(q.get_conditions_treating('Naproxen', conservative=True))
print("\n")
print(q.get_conditions_treating('Naproxen', conservative=False))