-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathnewproj.py
76 lines (62 loc) · 2.08 KB
/
newproj.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from sklearn.neighbors import KDTree
import joblib
import json
import sys
import time
from scipy.sparse import data
import pandas as pd
import gensim
from nltk.stem import WordNetLemmatizer
start = time.time()
combined = "BCD TO 7 Segment"
DATA_DIR = r'.\Results\datasetcleaned.json'
f1 = open(DATA_DIR, 'r', encoding="utf8")
dat = json.load(f1)
wnl = WordNetLemmatizer()
def lemmatize_stemming(text):
lemmatized_words = wnl.lemmatize(text)
return lemmatized_words
def preprocess(text):
result = ""
for token in gensim.utils.simple_preprocess(text):
if token not in gensim.parsing.preprocessing.STOPWORDS and len(token) >= 3:
result += lemmatize_stemming(token) + " "
return result
# if lemmitization is included
# data = preprocess(combined)
# data_words = pd.Series(data)
#if lemmitization isn't included
data_words = pd.Series(combined)
#loading the models
loaded_vector = joblib.load(r'.\Models\CVfitted.pkl')
data_vectorized = loaded_vector.transform(data_words)
loaded_model = joblib.load(r'.\Models\ldafitted.pkl')
data_vectorized = loaded_model.transform(data_vectorized)
loaded_tree = joblib.load(r'.\Models\finaltree.pkl')
dist, ind = loaded_tree.query(data_vectorized, k = 50)
print("Testing Circuit Text: {}". format(combined))
print()
count = 0
split = 5
li = []
#printing results
for i in ind[0].tolist():
count += 1
if count < split:
print("Recommendation {}". format(count))
print("https://circuitverse.org/users/{}/projects/{}".format(dat[i]["author_id"], dat[i]["id"]))
print()
else:
li.append((dat[i]["star_count"]+dat[i]["view"], i))
count = split - 1
li.sort(reverse=True)
num_of_recom = 10 # the total number of recomendations
for index, tuple in enumerate(li):
count += 1
if count > num_of_recom:
break
print("Recommendation {}". format(count))
print("https://circuitverse.org/users/{}/projects/{}".format(dat[tuple[1]]["author_id"], dat[tuple[1]]["id"]))
print()
end = time.time()
print(end - start)