-
Notifications
You must be signed in to change notification settings - Fork 0
/
trainer.py
executable file
·151 lines (133 loc) · 5.88 KB
/
trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import csv
import tensorflow as tf
import numpy
from dialog import Dialog
from model import Encoder, Decoder
# 1. Load Data
print("<<<< Loading Data >>>>")
dialog_list = Dialog.resolve_data("./data/movie_conversations.txt", "./data/movie_lines.txt", vocab_size=2000)
Dialog.load_word2ids()
dialog_x, dialog_y = Dialog.pair_qa_ids(dialog_list)
data_X = tf.data.Dataset.from_tensor_slices(dialog_x)
data_Y = tf.data.Dataset.from_tensor_slices(dialog_y)
data_Y_shifted = data_Y.map(lambda sentence: tf.concat([tf.constant([Dialog.word2id("TSTSTARTTST")]), sentence[:-1]], 0))
print("Amount of observations: " + str(len(dialog_x)))
# 2. Specify Deep Learning Parameters
print("<<<< Specifying Hyper Parameters >>>>")
# the data should not change the parameters significantly, so we are not saving it locally
embedding_dim = 32
vocab_size = len(Dialog.all_tokens) + 3
sequence_len = Dialog.max_dialog_len
batch_size = 32
encoder_hidden_size = 64
epoch = 3000
save_interval = 10
learning_rate = 0.005
data_set = tf.data.Dataset.zip((data_X, data_Y, data_Y_shifted))
data_set = data_set.shuffle(buffer_size=len(dialog_x)).batch(batch_size, drop_remainder=True)
print("Vocabulary Size is :" + str(vocab_size))
# 3. Building Model
# using random embedding. after testing, we will try pre-trained word2vec
print("<<<< Building Model >>>>")
def write_table(table, file):
with open(file, 'w') as csvfile:
writer = csv.writer(csvfile)
[writer.writerow(r) for r in table]
def read_table(file):
try:
with open(file, 'r') as csvfile:
reader = csv.reader(csvfile)
table = [[float(e) for e in r] for r in reader]
table = tf.cast(numpy.asarray(table), dtype=tf.dtypes.float32)
print("Read Saved Embedding")
return table
except Exception:
return None
embedding = read_table("random_embedding.csv")
if embedding is None or len(embedding) < vocab_size:
print("Rebuild Embedding")
embedding = tf.random.normal(shape=[vocab_size, embedding_dim])
write_table(embedding.numpy(), "random_embedding.csv")
encoder = Encoder(batch_size=batch_size, units=encoder_hidden_size, embedding=embedding)
# <encoder_output> dimension: [batch_size, sequence_len, encoder_units]; These are useless
# <encoder_hidden> dimension: [batch_size, encoder_units]; This is the THOUGHT VECTOR
decoder = Decoder(batch_size=batch_size, encoder_hidden_size=encoder_hidden_size,
embedding_dim=embedding_dim,embedding=embedding, vocab_size=vocab_size)
# <decoder_output> dimension: [batch_size, sequence_len, encoder_units]
# <decoder_hidden> dimension: [batch_size, encoder_units]
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
cross_entropy = tf.keras.losses.CategoricalCrossentropy()
try:
encoder.load_weights('encoder_weights_saving')
decoder.load_weights('decoder_weights_saving')
except Exception:
print("No previous savings found. Start new training.....")
with tf.Graph().as_default():
print('2, executing_eagerly: {}'.format(tf.executing_eagerly()))
logdir = 'logs/func/'
writer = tf.summary.create_file_writer(logdir)
tf.summary.trace_on(graph=True, profiler=True)
def train(encoder_input, decoder_input, decoder_target):
loss = 0.0
with tf.GradientTape() as tape:
enc_output, enc_hidden = encoder(inputs=encoder_input)
decoder_predict = decoder(inputs=decoder_input, initial_state=enc_hidden, encoder_output=enc_output)
decoder_target = tf.one_hot(decoder_target, vocab_size, axis=-1)
decoder_predict = tf.cast(decoder_predict, tf.dtypes.float32)
#print("Target: ")
#print(decoder_target)
#print("Predict: ")
#print(decoder_predict)
decoder_target = tf.cast(decoder_target, tf.dtypes.float32)
loss += cross_entropy(y_true=decoder_target, y_pred=decoder_predict)
print("loss:")
print(loss.numpy())
# print("... batch cross entropy loss: " + str(loss.numpy()))
variables = encoder.trainable_variables + decoder.trainable_variables + decoder.attention.trainable_variables
# print(variables)
gradients = tape.gradient(loss, variables)
#optimizer.apply_gradients(zip(gradients, variables))
return loss.numpy()
def predict(input_string):
encoder.batch_size = 1
decoder.batch_size = 1
decoder.training = False
input_batch = []
sent_ids = Dialog.sent2id(input_string)
input_batch.append(sent_ids)
input_np = numpy.asarray(input_batch)
enc_output, enc_hidden = encoder(inputs=input_np)
dec_output = decoder(inputs=None, initial_state=enc_hidden)
output_word_ids = tf.math.argmax(dec_output, axis=2)
encoder.batch_size = batch_size
decoder.batch_size = batch_size
decoder.training = True
# print(output_word_ids.numpy()[0])
reply = Dialog.id2sent(output_word_ids.numpy()[0])
reply = reply.replace("TSTEOSTST", " ]")
reply = reply.replace("TSTSTARTTST", "[ ")
return reply
# 4. Run Model
print("<<<< Start Training >>>>")
for step in range(epoch):
with writer.as_default():
tf.summary.trace_export(
name="my_func_trace",
step=0,
profiler_outdir=logdir)
total_loss = 0.0
num_batches = 0
for index, (X, Y, Y_shifted) in enumerate(data_set):
batch_loss = train(X, Y_shifted, Y)
total_loss += batch_loss
num_batches += 1
total_loss = total_loss
data_set = tf.data.Dataset.zip((data_X, data_Y, data_Y_shifted))
data_set = data_set.shuffle(buffer_size=len(dialog_x)).batch(batch_size, drop_remainder=True)
print("Step: " + str(step) + ", Total Loss: " + str(total_loss))
print("--- Me: How are you doing, my dear?")
print("--- Computer: " + predict("How are you doing, my dear?"))
if step % save_interval == 0:
encoder.save_weights('encoder_weights_saving')
decoder.save_weights('decoder_weights_saving')
print("Model weights saved ....")