-
Notifications
You must be signed in to change notification settings - Fork 0
/
LinUCB.py
53 lines (46 loc) · 1.91 KB
/
LinUCB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
from numpy.random import choice
class LinUCB(object): #with disjoint linear models
def __init__(self,feature_size,confidence=1):
self.n=feature_size
self.A={}
self.b={}
self.c=confidence
self.total_runs = 0
self.total_reward = 0
def learn(self,context,aritcleID,reward):
'''Update parameters with [context vector],[aritcleID(article chosen)],[reward(click or not)]'''
assert aritcleID in self.A #for debug
self.A[aritcleID]+=np.dot(context.reshape((self.n,1)),context.reshape(1,self.n))
self.b[aritcleID]+=reward*context
def predict(self,context,pool):
'''Predict which article/bandit is most likely to generate a reward given a context vector(user traits)'''
assert len(context)==self.n #for debug
p=np.zeros(len(pool))
for id in pool:
if id not in self.A:
self.A[id]=np.eye(self.n)
self.b[id]=np.zeros(self.n)
theta=np.linalg.solve(self.A[id],self.b[id])#theta=inv(A)*b
confidence_bound=self.c*np.sqrt(np.dot(context,np.linalg.solve(self.A[id],context)))
index,=np.where(pool==id)
p[index]=np.dot(theta.T,context)+confidence_bound
decision=choice(np.flatnonzero(p == p.max()))
return pool[decision]
def accuracy_update(self, reward):
'''
update accuracy: after prediction, calculate new average reward
'''
self.total_runs += 1
self.total_reward += reward
return self.total_reward/self.total_runs
if __name__=='__main__':
agent=LinUCB(feature_size=3,confidence=1)
context=np.array([1,2,3])
pool=[111,222,333]
print(agent.predict(context,pool))
agent.learn(context,111,1)
agent.learn(context,222,0)
agent.learn(context,111,0)
agent.learn(context,333,0)
print(agent.predict(context,pool))