-
Notifications
You must be signed in to change notification settings - Fork 0
/
reprocess.py
95 lines (84 loc) · 2.49 KB
/
reprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import sys
import numpy as np
import tensorflow.keras
import pandas as pd
import sklearn as sk
import tensorflow as tf
from tensorflow.keras.models import load_model
def mse_check(seq_tt, affi_tt, model, alpha=0.1):
probas = model.predict(seq_tt)
errors = 0
idx = []
for i in range(probas.shape[0]):
if (probas[i] > (1 + alpha) * affi_tt[i]) | (probas[i] < (1 - alpha) * affi_tt[i]):
errors += 1
idx.append(i)
er = errors / probas.shape[0]
print(er)
return er, idx
def one_hot_check(seq_tt, affi_tt, model, gate):
probas = model.predict(seq_tt)
errors = 0
target = [np.argsort(i)[-1] for i in affi_tt]
pdt = [np.argsort(i)[-1] for i in probas]
idx = []
for i in range(probas.shape[0]):
if (pdt[i] != target[i]) & (probas[i, target[i]] < gate):
errors += 1
idx.append(i)
er = errors / probas.shape[0]
print(er)
return er, idx
def bin_check(seq_tt, affi_tt, model):
probas = model.predict(seq_tt)
y_hat = np.zeros(probas.shape)
for i in range(probas.shape[0]):
if probas[i, :] > 0.5:
y_hat[i, :] = 1
else:
y_hat[i, :] = 0
error = 0
idx = []
for i in range(probas.shape[0]):
if y_hat[i] != affi_tt[i]:
error += 1
idx.append(i)
er = error / probas.shape[0]
print(er)
return er, idx
def range_check(seq_tt, affi_tt, model, wide):
probas = model.predict(seq_tt)
y_test = np.zeros(probas.shape)
for i in range(probas.shape[0]):
if affi_tt[i, :] > 0.5:
y_test[i, :] = 1
else:
y_test[i, :] = 0
error = 0
idx = []
for i in range(probas.shape[0]):
if (probas[i, :][0] >= 0.5 + wide) & (y_test[i, :][0] == 0) | (probas[i, :][0] < 0.5 - wide) & (y_test[i, :][0] == 1):
error += 1
idx.append(i)
er = error / probas.shape[0]
print(er)
return er, idx
def de_one_hot(seq):
raw = []
A_oh = [1, 0, 0, 0]
U_oh = [0, 1, 0, 0]
C_oh = [0, 0, 1, 0]
G_oh = [0, 0, 0, 1]
for i in range(seq.shape[0]):
cache = ''
for j in range(seq.shape[1]):
if list(seq[i, j, :]) == A_oh:
cache += 'A'
elif list(seq[i, j, :]) == U_oh:
cache += 'U'
elif list(seq[i, j, :]) == C_oh:
cache += 'C'
elif list(seq[i, j, :]) == G_oh:
cache += 'G'
raw.append(cache)
return raw