-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathparallel_tictactoe.py
executable file
·74 lines (58 loc) · 2.23 KB
/
parallel_tictactoe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# Copyright (c) 2020 DeNA Co., Ltd.
# Licensed under The MIT License [see LICENSE for details]
# implementation of Parallel Tic-Tac-Toe
import random
import numpy as np
from .tictactoe import Environment as TicTacToe
class Environment(TicTacToe):
def __str__(self):
s = ' ' + ' '.join(self.Y) + '\n'
for i in range(3):
s += self.X[i] + ' ' + ' '.join([self.C[self.board[i, j]] for j in range(3)]) + '\n'
return s
def step(self, actions):
# state transition function
selected_player = random.choice(list(actions.keys()))
action = actions[selected_player]
self._step(action, selected_player)
def _step(self, action, selected_player):
selected_color = [self.BLACK, self.WHITE][selected_player]
x, y = action // 3, action % 3
self.board[x, y] = selected_color
# check winning condition
if self.board[x, :].sum() == 3 * selected_color \
or self.board[:, y].sum() == 3 * selected_color \
or (x == y and np.diag(self.board, k=0).sum() == 3 * selected_color) \
or (x == 2 - y and np.diag(self.board[::-1, :], k=0).sum() == 3 * selected_color):
self.win_color = selected_color
self.record.append((selected_color, action))
def diff_info(self, _):
if len(self.record) == 0:
return ""
color, action = self.record[-1]
return self.action2str(action) + ":" + self.C[color]
def update(self, info, reset):
if reset:
self.reset()
else:
saction, scolor = info.split(":")
action, player = self.str2action(saction), 'OX'.index(scolor)
self._step(action, player)
def turn(self):
return NotImplementedError()
def turns(self):
return self.players()
if __name__ == '__main__':
e = Environment()
for _ in range(100):
e.reset()
while not e.terminal():
print(e)
action_map = {}
for p in e.turns():
actions = e.legal_actions(p)
print([e.action2str(a) for a in actions])
action_map[p] = random.choice(actions)
e.step(action_map)
print(e)
print(e.outcome())