-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRecommenders.py
148 lines (78 loc) · 4.64 KB
/
Recommenders.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import numpy as np
import pandas
class item_similarity_recommender_py():
def __init__(self):
self.train_data = None
self.user_id = None
self.item_id = None
self.cooccurence_matrix = None
self.songs_dict = None
self.rev_songs_dict = None
self.item_similarity_recommendations = None
def get_user_items(self, user):
user_data = self.train_data[self.train_data[self.user_id] == user]
user_items = list(user_data[self.item_id].unique())
return user_items
def get_item_users(self, item):
item_data = self.train_data[self.train_data[self.item_id] == item]
item_users = set(item_data[self.user_id].unique())
return item_users
def get_all_items_train_data(self):
all_items = list(self.train_data[self.item_id].unique())
return all_items
def construct_cooccurence_matrix(self, user_songs, all_songs):
user_songs_users = []
for i in range(0, len(user_songs)):
user_songs_users.append(self.get_item_users(user_songs[i]))
cooccurence_matrix = np.matrix(np.zeros(shape=(len(user_songs), len(all_songs))), float)
for i in range(0,len(all_songs)):
songs_i_data = self.train_data[self.train_data[self.item_id] == all_songs[i]]
users_i = set(songs_i_data[self.user_id].unique())
for j in range(0,len(user_songs)):
users_j = user_songs_users[j]
users_intersection = users_i.intersection(users_j)
if len(users_intersection) != 0:
users_union = users_i.union(users_j)
cooccurence_matrix[j,i] = float(len(users_intersection))/float(len(users_union))
else:
cooccurence_matrix[j,i] = 0
return cooccurence_matrix
def generate_top_recommendations(self, user, cooccurence_matrix, all_songs, user_songs):
print("Non zero values in cooccurence_matrix :%d" % np.count_nonzero(cooccurence_matrix))
user_sim_scores = cooccurence_matrix.sum(axis=0)/float(cooccurence_matrix.shape[0])
user_sim_scores = np.array(user_sim_scores)[0].tolist()
sort_index = sorted(((e,i) for i,e in enumerate(list(user_sim_scores))), reverse=True)
columns = ['user_id', 'song', 'score', 'rank']
df = pandas.DataFrame(columns=columns)
rank = 1
for i in range(0,len(sort_index)):
if ~np.isnan(sort_index[i][0]) and all_songs[sort_index[i][1]] not in user_songs and rank <= 10:
df.loc[len(df)]=[user,all_songs[sort_index[i][1]],sort_index[i][0],rank]
rank = rank+1
if df.shape[0] == 0:
print("The current user has no songs for training the item similarity based recommendation model.")
return -1
else:
global df5
df5= ['NAME'] + df['song'].tolist()
return df
def create(self, train_data, user_id, item_id):
self.train_data = train_data
self.user_id = user_id
self.item_id = item_id
def recommend(self, user):
user_songs = self.get_user_items(user)
print("No. of unique songs for the user: %d" % len(user_songs))
all_songs = self.get_all_items_train_data()
print("no. of unique songs in the training set: %d" % len(all_songs))
cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)
df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs)
return df_recommendations
def get_similar_items(self, item_list):
user_songs = item_list
all_songs = self.get_all_items_train_data()
print("no. of unique songs in the training set: %d" % len(all_songs))
cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)
user = ""
df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs)
return df_recommendations