-
Notifications
You must be signed in to change notification settings - Fork 1
/
duel.py
92 lines (82 loc) · 3.12 KB
/
duel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import time
from game import Game
import math
def duel(ui, agent0, agent1, n_episodes, rng):
scores = [0, 0]
for _episode in range(n_episodes):
ui.show_new_game()
game = Game(ui, agent0, agent1, rng)
(state, winner, winning_fields) = game.play()
if winner is not None:
if winner == game.assigned_markers[0]:
scores[0] += 1
else:
scores[1] += 1
ui.show_scores(scores)
ui.show_final_state(game.board, state, winner, winning_fields)
time.sleep(2.0)
def duel_with_training(ui, agent0, agent1, n_episodes, rng):
history_result = []
for episode in range(n_episodes):
agent0.epsilon = 1.0 - 0.9 * episode / n_episodes
game = Game(ui, agent0, agent1, rng)
(state, winner, winning_fields) = game.play()
if state == Game.GameState.DRAW:
history_result.append(0.0)
for p in game.players:
final_reward = 0.0
p.update_policy(final_reward)
else:
if winner == game.assigned_markers[0]:
history_result.append(1.0)
else:
history_result.append(-1.0)
for p in game.players:
if winner == p.marker:
final_reward = 1.0
else:
final_reward = -1.0
p.update_policy(final_reward)
return history_result
def duel_manual_against_improving_agent(ui, agent0, agent1, policies, rng):
scores = [0, 0]
ui.show_scores(scores)
ui.write("You", "-PLAYER0_TEXT-")
ui.show_image(level_bot_images(1), "-PLAYER1_IMG-")
level = 0
agent1.load_policy(policies[level])
while True:
ui.show_image(level_bot_images(level+1), "-PLAYER1_IMG-")
if level == 0:
ui.show_new_game()
else:
ui.write("", "-TITLE_TEXT-")
ui.write(f"Bot v{level + 1:.1f}", "-PLAYER1_TEXT-")
game = Game(ui, agent0, agent1, rng)
(state, winner, winning_fields) = game.play()
if winner is not None:
if winner == game.assigned_markers[0]:
scores[0] += 1
# either load pretrained policy or improve upon best one
if level < len(policies) - 1:
level += 1
agent1.load_policy(policies[level])
else:
for p in game.players:
if p.marker == game.assigned_markers[1]:
print("training")
final_reward = -1.0
p.update_policy(final_reward)
else:
scores = [0, 0]
level = 0
agent1.load_policy(policies[level])
ui.show_scores(scores)
ui.show_final_state(game.board, state, winner, winning_fields)
time.sleep(2.0)
if winner == game.assigned_markers[1]:
ui.warn("You lost. End of game.")
time.sleep(5.0)
ui.warn("")
def level_bot_images(level):
return "./img/bot" + str(min(level//2+1, 5)) + ".png"