-
Notifications
You must be signed in to change notification settings - Fork 0
/
resources.py
executable file
·204 lines (182 loc) · 6.89 KB
/
resources.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
"""
Helper functions to assist with recommendation engine
"""
import pandas as pd
import numpy as np
from scipy import sparse
from lightfm import LightFM
from surprise import Reader, Dataset, KNNBasic
from sklearn.metrics.pairwise import cosine_similarity
def create_interaction_matrix(
df, user_col, item_col, rating_col, norm=False, threshold=None
):
interactions = (
df.groupby([user_col, item_col])[rating_col]
.sum()
.unstack()
.reset_index()
.fillna(0)
.set_index(user_col)
)
if norm:
interactions = interactions.applymap(lambda x: 1 if x > threshold else 0)
return interactions
def create_user_dict(interactions):
"""
Creates a user dictionary based on their index and number in interaction dataset \n
Arguments:
interactions - DataFrame with user-item interactions
Returns:
user_dict - Dictionary containing interaction_index as key and user_id as value
"""
user_id = list(interactions.index)
user_dict = {}
counter = 0
for i in user_id:
user_dict[i] = counter
counter += 1
return user_dict
def create_item_dict(df, id_col, name_col):
"""
Creates an item dictionary based on their item_id and item name
Arguments:
- df = Pandas dataframe containing item information
- id_col = column name containing unique identifier for an item
- name_col = column name containing name of the item
Returns:
item_dict = Dictionary containing item_id as key and item_name as value
"""
item_dict = {}
for i in range(df.shape[0]):
item_dict[(df.loc[i, id_col])] = df.loc[i, name_col]
return item_dict
def run_model(interactions, n_components=30, loss="warp", epoch=30, n_jobs=4):
"""
Runs matrix-factorization model using LightFM
Arguments:
interactions = DataFrame containing user-item interactions
n_components = number of desired embeddings to create to define item and user
loss = loss function other options are logistic, brp
epoch = number of epochs to run
n_jobs = number of cores used for execution
Returns:
Model = Trained model
"""
x = sparse.csr_matrix(interactions.values)
model = LightFM(no_components=n_components, loss=loss)
model.fit(x, epochs=epoch, num_threads=n_jobs)
return model
def run_model_KNN(ratings, k):
reader = Reader()
data = Dataset.load_from_df(ratings, reader)
# Define the algorithm object; in this case kNN
knn = KNNBasic(k, sim_options={"name": "cosine", "user_based": False})
knn.fit(data.build_full_trainset())
return knn
def get_recs(
model,
interactions,
user_id,
user_dict,
item_dict,
threshold=0,
num_items=10,
show_known=True,
show_recs=True,
):
"""
Produces user recommendations
Arguments:
model = Trained matrix factorization model\n
interactions = dataset used for training the model\n
user_id = user ID for which we need to generate recommendation\n
user_dict = Dictionary containing interaction_index as key and user_id as value\n
item_dict = Dictionary containing item_id as key and item_name as value\n
threshold = value above which the rating is favorable in new interaction matrix\n
num_items = Number of recommendations to provide\n
show_known (optional) - if True, prints known positives\n
show_recs (optional) - if True, prints list of N recommended items which user hopefully will be interested in
Returns:
list of titles user_id is predicted to be interested in
"""
n_users, n_items = interactions.shape
# Get value for user_id using dictionary
user_x = user_dict[user_id]
# Generate predictions
scores = pd.Series(model.predict(user_x, np.arange(n_items)))
# Get top predictions
scores.index = interactions.columns
scores = list(pd.Series(scores.sort_values(ascending=False).index))
# Get list of known values
known_items = list(
pd.Series(
interactions.loc[user_id, :][interactions.loc[user_id, :] > threshold].index
).sort_values(ascending=False)
)
# Ensure predictions are not already known
scores = [x for x in scores if x not in known_items]
# Take required number of items from prediction list
return_score_list = scores[0:num_items]
# Convert from item id to item name using item_dict
known_items = list(pd.Series(known_items).apply(lambda x: item_dict[x]))
scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[x]))
if show_known == True:
print("Known Likes:")
counter = 1
for i in known_items:
print(str(counter) + "- " + i)
counter += 1
df = pd.DataFrame(columns=["item", "score"])
if show_recs == True:
print("\n Recommended Items:")
counter = 1
for i in scores:
print(str(counter) + "- " + i)
df_temp = pd.DataFrame({"item": i, "score": counter}, index=[counter - 1])
df = df.append(df_temp)
# df.append({"item": i, "score": counter}, ignore_index = True)
counter += 1
# return scores
return df
def create_item_emdedding_matrix(model, interactions):
"""
Creates item-item distance embedding matrix
Arguments:
model = trained matrix factorization model
interactions = dataset used for training the model
Returns:
Pandas dataframe containing cosine distance matrix between items
"""
df_item_norm_sparse = sparse.csr_matrix(model.item_embeddings)
similarities = cosine_similarity(df_item_norm_sparse)
item_emdedding_matrix = pd.DataFrame(similarities)
item_emdedding_matrix.columns = interactions.columns
item_emdedding_matrix.index = interactions.columns
return item_emdedding_matrix
def get_item_recs(item_emdedding_matrix, item_id, item_dict, n_items=10, show=True):
"""
Function to create item-item recommendation
Arguments:
- item_emdedding_distance_matrix = Pandas dataframe containing cosine distance matrix b/w items
- item_id = item ID for which we need to generate recommended items
- item_dict = Dictionary type input containing item_id as key and item_name as value
- n_items = Number of items needed as an output
Returns:
- recommended_items = List of recommended items
"""
recommended_items = list(
pd.Series(
item_emdedding_matrix.loc[item_id, :]
.sort_values(ascending=False)
.head(n_items + 1)
.index[1 : n_items + 1]
)
)
if show == True:
print("Интересующая игра: {0}".format(item_dict[item_id]))
print("Схожие игры:")
counter = 1
for i in recommended_items:
print(str(counter) + " - " + item_dict[i])
counter += 1
return recommended_items