-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathintentsModule.py
67 lines (55 loc) · 2.03 KB
/
intentsModule.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas
import sklearn
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
import string
import numpy as np
import random
from re import sub
from os.path import join, dirname
from standarizeCSV import readAndReWrite
def _getFormattedPlainText(datainput):
text = ""
for data in datainput:
text += str(data) + "\n"
text = text.lower()
text = sub('[' + string.punctuation + ']', '', text)
return text
# return intent type
def getAnswer(inputString, threshold=1):
try:
data = pandas.read_csv(join(dirname(__file__), "standarizedIntents.csv"))
except IOError:
readAndReWrite()
data = pandas.read_csv(join(dirname(__file__), "standarizedIntents.csv"))
inputString = inputString.lower()
inputString = sub('[' + string.punctuation + ']', '', inputString)
trainDocs = _getFormattedPlainText(data.Input)
trainDocsSplitted = trainDocs.split('\n')
alltext = trainDocs + "\n" + inputString
globalVector = TfidfVectorizer(lowercase=True, stop_words='english', analyzer='word')
tfidf_vec = globalVector.fit_transform(alltext.split('\n'))
pairwise_similarity = (tfidf_vec * tfidf_vec.T).A
testSim = pairwise_similarity[len(pairwise_similarity) - 1]
testSim[len(trainDocsSplitted)] = 0
maxvalue = testSim.max()
index = np.where(testSim == maxvalue)[0]
if (maxvalue < threshold):
return ["message"]
else:
removeDualFlag = False
for i in index:
if (not (" - " in data.Action[i])):
removeDualFlag = True;
break;
if removeDualFlag:
finalData = []
for i in index:
if (not (" - " in data.Action[i])):
print data.Action[i]
finalData.append(data.Action[i])
classificationResult = str(random.choice(finalData))
return classificationResult.split(" - ")
else:
_index = np.random.choice(index)
return str(data.Action[_index]).split(" - ")