-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_model_predictions.py
118 lines (108 loc) · 4.23 KB
/
test_model_predictions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
""" This file contains the testing script for the BLRs to calculate the predictive metrics, calibration and net benefit
Author:
Claudio Fanconi
"""
import os
import pandas as pd
import numpy as np
from src.utils.config import config
from src.metrics import nr_variables_used, print_results
from src.plots import calibration_plot, kaplan_meyer_plot, net_benefit_plot
def main(random_state: int = 42) -> None:
"""Main function which trains the deep learning model
Args:
random_state (int, 42): random state for reproducibility
Returns:
None
"""
# Load relevant test data
feature_matrix = feature_matrix = (
pd.read_csv(config.data.data_path, low_memory=False)
# .sort_values(by="PAT_DEID")
.set_index("PAT_DEID")
)
outcomes = pd.read_csv(config.data.label_path).set_index("PAT_DEID")
labels_all = outcomes[config.data.label_type].reindex(feature_matrix.index)
test_ids = pd.read_csv(config.data.test_ids)["PAT_DEID"]
first_op_df = pd.read_csv(config.data.info_df, parse_dates=[8, 9, 12])
# Drop the patients that do not have notes
feature_matrix = feature_matrix.dropna()
labels_all = labels_all.loc[
labels_all.index.intersection(feature_matrix.index).intersection(test_ids)
]
X_test = feature_matrix.loc[labels_all.index]
for label_type in config.data.label_type:
y_test = labels_all[label_type]
print(f"-------------------- Results for {label_type} ----------------------")
# load the predictions / predictive distributions of the four models:
tabular_LASSO = np.load(
os.path.join(
config.data.save_predictions,
f"tabular_model_predictions_{label_type}.npz",
),
allow_pickle=True,
)["arr_0"]
language_LASSO = np.load(
os.path.join(
config.data.save_predictions,
f"language_model_predictions_{label_type}.npz",
),
allow_pickle=True,
)["arr_0"]
fusion_LASSO = np.load(
os.path.join(
config.data.save_predictions,
f"fusion_model_predictions_{label_type}.npz",
),
allow_pickle=True,
)["arr_0"]
language_BERT = np.load(
os.path.join(
config.data.save_predictions,
f"language_bert_predictions_{label_type}.npz",
),
allow_pickle=True,
)["arr_0"]
fusion_BERT = np.load(
os.path.join(
config.data.save_predictions,
f"fusion_bert_predictions_{label_type}.npz",
),
allow_pickle=True,
)["arr_0"]
# Print predictive performance of models:
predictions = {
"Tabular LASSO": tabular_LASSO,
"Language LASSO": language_LASSO,
"Fusion LASSO": fusion_LASSO,
"Language BERT": language_BERT,
"Fusion BERT": fusion_BERT,
}
# Calculate the predictive metrics
for name, y_pred in predictions.items():
print(name)
print(
nr_variables_used(
feature_matrix, config.data.model_path, name, label_type
),
)
print_results(y_test, y_pred)
# Create Kaplan-Meyer risk analysis
kaplan_meyer_plot(
predictions=pd.DataFrame(
data=np.array([y_pred, y_test]).T,
columns=["predictions", label_type],
index=labels_all.index,
),
acu_dates=first_op_df,
model_name=name,
label_type=label_type,
save_path=config.data.figures_path,
right_censor=int(label_type.split("_")[1]) + 1,
)
# Create calibration plot
calibration_plot(predictions, y_test, config.data.figures_path, label_type)
# Create Net Benefit Curve
net_benefit_plot(predictions, y_test, config.data.figures_path, label_type)
if __name__ == "__main__":
main(random_state=config.seed)