-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathpreprocessing.py
81 lines (64 loc) · 2.37 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import numpy as np
import keras
from keras.layers import *
from keras.models import *
from keras.optimizers import *
import keras.backend as K
from scipy import stats
from sklearn.model_selection import StratifiedKFold
if __name__ == "__main__":
demo = np.loadtxt('demo.txt');
L = 1000
S = 0
data = np.zeros((demo.shape[0],1,L,22,1));
label = np.zeros((demo.shape[0],));
# load all data
idx = 0
data_all = None
for i in range(demo.shape[0]):
subject_string = format(int(demo[i,0]),'06d')
#print(subject_string)
filename_full = '/fs/neurosci01/qingyuz/hcp/hcp_tc_npy_22/'+subject_string+'_cortex.npy'
full_sequence = np.load(filename_full);
if full_sequence.shape[1] < S+L:
continue
full_sequence = full_sequence[:,S:S+L];
z_sequence = stats.zscore(full_sequence, axis=1)
if data_all is None:
data_all = z_sequence
else:
data_all = np.concatenate((data_all, z_sequence), axis=1)
data[idx,0,:,:,0] = np.transpose(z_sequence)
label[idx] = demo[i,1]
idx = idx + 1
# compute adj matrix
n_regions = 22
A = np.zeros((n_regions, n_regions))
for i in range(n_regions):
for j in range(i, n_regions):
if i==j:
A[i][j] = 1
else:
A[i][j] = abs(np.corrcoef(data_all[i,:], data_all[j,:])[0][1]) # get value from corrcoef matrix
A[j][i] = A[i][j]
np.save('data/adj_matrix.npy', A)
# split train/test and save data
data = data[:idx]
label = label[:idx]
print(data.shape)
skf = StratifiedKFold(n_splits=5,shuffle=True)
fold = 1
for train_idx, test_idx in skf.split(data, label):
train_data = data[train_idx]
train_label = label[train_idx]
test_data = data[test_idx]
test_label = label[test_idx]
filename = 'data/train_data_'+str(fold)+'.npy'
np.save(filename,train_data)
filename = 'data/train_label_'+str(fold)+'.npy'
np.save(filename,train_label)
filename = 'data/test_data_'+str(fold)+'.npy'
np.save(filename,test_data)
filename = 'data/test_label_'+str(fold)+'.npy'
np.save(filename,test_label)
fold = fold + 1