forked from ymgaq/Pyaq
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel.py
142 lines (112 loc) · 5.61 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# -*- coding: utf-8 -*-
from board import *
import tensorflow.compat.v1 as tf
from tensorflow.core.protobuf import saver_pb2
FILTER_CNT = 96
BLOCK_CNT = 6
w_wdt = 0.007
b_wdt = 0.015
class DualNetwork(object):
def get_variable(self, shape_, width_=0.007, name_="weight"):
var = tf.get_variable(name_, shape=shape_,
initializer=tf.random_normal_initializer(
mean=0, stddev=width_))
if not tf.get_variable_scope()._reuse:
tf.add_to_collection("vars_train", var)
return var
def conv2d(self, x, w):
return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1],
padding='SAME', name="conv2d")
def res_block(self, x, input_size, middle_size, output_size,
dr_block=1.0, scope_name="res"):
with tf.variable_scope(scope_name + "_0"):
w0 = self.get_variable([3, 3, input_size, middle_size],
w_wdt, name_="weight")
b0 = self.get_variable([middle_size], b_wdt, name_="bias")
conv0 = tf.nn.relu(self.conv2d(x, w0) + b0)
with tf.variable_scope(scope_name + "_1"):
w1 = self.get_variable([3, 3, middle_size, output_size],
w_wdt, name_="weight")
b1 = self.get_variable([output_size], b_wdt, name_="bias")
conv1 = tf.nn.dropout(self.conv2d(conv0, w1) + b1, dr_block)
if input_size == output_size:
x_add = x
elif input_size < output_size:
x_add = tf.pad(x, [[0, 0], [0, 0], [0, 0],
[0, output_size - input_size]])
else:
x_add = tf.slice(x, [0, 0, 0, 0],
[-1, BSIZE, BSIZE, output_size])
return tf.nn.relu(tf.add(conv1, x_add))
def model(self, x, temp=1.0, dr=1.0):
hi = []
prev_h = tf.reshape(x, [-1, BSIZE, BSIZE, FEATURE_CNT])
# residual blocks with N layers
for i in range(BLOCK_CNT):
input_size = FEATURE_CNT if i == 0 else FILTER_CNT
dr_block = 1 - (1 - dr) / BLOCK_CNT * i
hi.append(self.res_block(prev_h, input_size, FILTER_CNT, FILTER_CNT,
dr_block=dr_block, scope_name="res%d" % i))
prev_h = hi[i]
# policy connection
with tf.variable_scope('pfc'):
# 1st layer
# [-1, BSIZE, BSIZE, FILTER_CNT] => [-1, BSIZE**2 * 2]
w_pfc0 = self.get_variable([1, 1, FILTER_CNT, 2],
w_wdt, name_="weight0")
b_pfc0 = self.get_variable([BSIZE, BSIZE, 2], b_wdt, name_="bias0")
conv_pfc0 = tf.reshape(self.conv2d(hi[BLOCK_CNT - 1], w_pfc0)
+ b_pfc0, [-1, BVCNT * 2])
# 2nd layer
# [-1, BSIZE**2 * 2] => [-1, BSIZE**2 + 1]
w_pfc1 = self.get_variable([BVCNT * 2, BVCNT + 1],
w_wdt, name_="weight1")
b_pfc1 = self.get_variable([BVCNT + 1], b_wdt, name_="bias1")
conv_pfc1 = tf.matmul(conv_pfc0, w_pfc1) + b_pfc1
# divided by softmax temp and apply softmax
policy = tf.nn.softmax(tf.div(conv_pfc1, temp), name="policy")
# value connection
with tf.variable_scope('vfc'):
# 1st layer
# [-1, BSIZE, BSIZE, FILTER_CNT] => [-1, BSIZE**2]
w_vfc0 = self.get_variable([1, 1, FILTER_CNT, 1],
w_wdt, name_="weight0")
b_vfc0 = self.get_variable([BSIZE, BSIZE, 1], b_wdt, name_="bias0")
conv_vfc0 = tf.reshape(self.conv2d(hi[BLOCK_CNT - 1], w_vfc0)
+ b_vfc0, [-1, BVCNT])
# 2nd layer
# [-1, BSIZE**2] => [-1, 256]
w_vfc1 = self.get_variable([BVCNT, 256], w_wdt, name_="weight1")
b_vfc1 = self.get_variable([256], b_wdt, name_="bias1")
conv_vfc1 = tf.matmul(conv_vfc0, w_vfc1) + b_vfc1
relu_vfc1 = tf.nn.relu(conv_vfc1)
# 3rd layer
# [-1, 256] => [-1, 1]
w_vfc2 = self.get_variable([256, 1], w_wdt, name_="weight2")
b_vfc2 = self.get_variable([1], b_wdt, name_="bias2")
conv_vfc2 = tf.matmul(relu_vfc1, w_vfc2) + b_vfc2
# apply tanh
value = tf.nn.tanh(tf.reshape(conv_vfc2, [-1]), name="value")
return policy, value
def create_sess(self, ckpt_path=""):
with tf.get_default_graph().as_default():
sess_ = tf.Session(config=tf.ConfigProto(
gpu_options=tf.GPUOptions(
per_process_gpu_memory_fraction=0.9, # 最大値の90%まで
allow_growth=True # True->必要になったら確保, False->全部
),
allow_soft_placement=True, log_device_placement=False))
vars_train = tf.get_collection("vars_train")
v_to_init = list(set(tf.global_variables()) - set(vars_train))
saver = tf.train.Saver(vars_train, write_version=saver_pb2.SaverDef.V2)
if ckpt_path != "":
saver.restore(sess_, ckpt_path)
sess_.run(tf.variables_initializer(v_to_init))
else:
sess_.run(tf.global_variables_initializer())
return sess_
def save_vars(self, sess_, ckpt_path="model.ckpt"):
with tf.get_default_graph().as_default():
vars_train = tf.get_collection("vars_train")
saver = tf.train.Saver(vars_train, write_version=saver_pb2.SaverDef.V2)
saver.save(sess_, ckpt_path)