-
Notifications
You must be signed in to change notification settings - Fork 67
/
Copy pathsentiment.py
122 lines (94 loc) · 2.33 KB
/
sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# coding: utf-8
# In[7]:
from lstm import *
from sklearn.svm import SVC
# In[2]:
lst=lstm(False)
train=pickle.load(open("kaggle.p",'rb'))
# In[8]:
def prepare_data2(data):
xa1=[]
xb1=[]
y2=[]
for i in range(0,len(data)):
xa1.append(data[i][0])
#xb1.append(data[i][1])
#y2.append(round(data[i][2],0))
y2.append(data[i][1])
lengths=[]
for i in xa1:
lengths.append(len(i.split()))
#for i in xb1:
# lengths.append(len(i.split()))
maxlen = numpy.max(lengths)
emb1,mas1=getmtr(xa1,maxlen)
#emb2,mas2=getmtr(xb1,maxlen)
#y2=np.array(y2,dtype=np.float32)
return emb1,mas1,y2
def getmtr(xa,maxlen):
n_samples = len(xa)
ls=[]
x_mask = numpy.zeros((maxlen, n_samples)).astype(np.float32)
for i in range(0,len(xa)):
q=xa[i].split()
for j in range(0,len(q)):
x_mask[j][i]=1.0
while(len(q)<maxlen):
q.append(',')
ls.append(q)
xa=np.array(ls)
return xa,x_mask
def fpro(mydata):
count=[]
num=len(mydata)
px=[]
yx=[]
use_noise.set_value(0.)
for i in range(0,num,64):
q=[]
x=i+64
if x>num:
x=num
for j in range(i,x):
q.append(mydata[j])
x1,mas1,y2=prepare_data2(q)
ls=[]
ls2=[]
for j in range(0,len(q)):
ls.append(embed(x1[j]))
#ls2.append(embed(x2[j]))
trconv=np.dstack(ls)
emb1=np.swapaxes(trconv,1,2)
pred=lst.f_proj11(emb1,mas1)
#dm1=np.ones(mas1.shape,dtype=np.float32)
#dm2=np.ones(mas2.shape,dtype=np.float32)
#corr=f_cost(emb1,mas1,emb2,mas2,y2)
for z in range(0,len(q)):
yx.append(y2[z])
px.append(pred[z])
px=np.array(px)
yx=np.array(yx)
return px,yx
def getacc(vlc,xl,yl):
prd=vlc.predict(xl)
sc=np.sum(yl==prd)/float(len(yl))*100.0
return sc
# In[29]:
for i in range(0,10):
shuffle(train)
# In[30]:
xdat,ydat=fpro(train)
c7=int(0.7*len(xdat))
xtr=xdat[0:c7]
ytr=ydat[0:c7]
xcr=xdat[c7:]
ycr=ydat[c7:]
# In[31]:
clf = SVC(C=100,gamma=3.1,kernel='rbf')
#xtr:training data
#xcr:cross validation data
# In[32]:
scl=clf.fit(xtr,ytr)
print "Training accuracy:",getacc(scl,xtr,ytr)
print "Cross validation accuracy:",getacc(scl,xcr,ycr)
# In[ ]: