-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathutils.py
69 lines (58 loc) · 2.46 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
path = "../Datasets"
from itertools import groupby
from operator import itemgetter
import pickle
import os
from sklearn.preprocessing import StandardScaler
import numpy as np
#path = "/content/drive/My Drive/M2/AFMatriciel/Datasets"
def compute_sparse_correlation_matrix(A):
scaler = StandardScaler(with_mean=False)
scaled_A = scaler.fit_transform(A) # Assuming A is a CSR or CSC matrix
corr_matrix = (1/scaled_A.shape[0]) * (scaled_A.T @ scaled_A)
return corr_matrix
def pre_processing(mat, mat_file):
# Create bu and bi indexes
# bi_index is a list with a size equal to the number of users
# the jth element is a list storing the indexes of movies rated by user j
# bu_index is the same but storing the indexes of users whose rating is
# available for a movie
# These indexes will help to the algorithms computation
shape = str(mat.shape[0])+"_"+str(mat.shape[1])
bu_index_file = mat_file+"_bu_index_"+shape+".data"
bi_index_file = mat_file+"_bi_index_"+shape+".data"
if not (os.path.isfile(bu_index_file) and os.path.isfile(bi_index_file)):
#mat = io.loadmat(mat_file)['X']
"""mat = mat[1:5000,1:5000]
mat = mat[mat.getnnz(1)>0][:,mat.getnnz(0)>0]"""
print("Pre-processing...")
mat_nonzero = mat.nonzero()
"""cx = mat.tocoo()
bi_index = [[]]*mat.shape[0]
bu_index = [[]]*mat.shape[1]
for i,j,v in zip(cx.row, cx.col, cx.data):
bi_index[i].append(j)
bu_index[j].append(i)
print(bi_index[0])"""
print(" make bi indexes...")
bi_index = []
for k, g in groupby(zip(mat_nonzero[0], mat_nonzero[1]), itemgetter(0)):
to_add = list(map(lambda x:int(x[1]), list(g)))
bi_index.append(to_add)
print(" make bu indexes...")
bu_index = []
indexes = np.argsort(mat_nonzero[1])
for k, g in groupby(zip(mat_nonzero[1][indexes], mat_nonzero[0][indexes]), itemgetter(0)):
to_add = list(map(lambda x:int(x[1]), list(g)))
bu_index.append(to_add)
with open(bi_index_file, "wb") as fp:
pickle.dump(bi_index, fp)
with open(bu_index_file, "wb") as fp:
pickle.dump(bu_index, fp)
else:
with open(bi_index_file, "rb") as fp:
bi_index = pickle.load(fp)
with open(bu_index_file, "rb") as fp:
bu_index = pickle.load(fp)
print("Pre-processing done.")
return bu_index, bi_index