Translate text to image in Keras using GAN and Word2Vec as well as recurrent neural networks
The following models are implemented in [keras_text_to_image/library]
- dcgan.py: this version has a very noisy input with text input (half of the input is pure noise while the other half is generated from glove embedding of the input text)
- dcgan_v2.py: this version remove noise as input (the input is just glove embedding of the input text)
- dcgan_v3.py: this version add a configurable amount of noise as input together with the glove embedding of the text input
The sample codes below only generate very small images, but the image size can be increased if you have sufficient memory
Below is the sample codes to train the DCGan on a set of pokemon samples of pair (image, text)
import os
import sys
import numpy as np
from random import shuffle
def main():
seed = 42
np.random.seed(seed)
current_dir = os.path.dirname(__file__)
# add the keras_text_to_image module to the system path
sys.path.append(os.path.join(current_dir, '..'))
current_dir = current_dir if current_dir is not '' else '.'
img_dir_path = current_dir + '/data/pokemon/img'
txt_dir_path = current_dir + '/data/pokemon/txt'
model_dir_path = current_dir + '/models'
img_width = 32
img_height = 32
img_channels = 3
from keras_text_to_image.library.dcgan import DCGan
from keras_text_to_image.library.utility.img_cap_loader import load_normalized_img_and_its_text
image_label_pairs = load_normalized_img_and_its_text(img_dir_path, txt_dir_path, img_width=img_width, img_height=img_height)
shuffle(image_label_pairs)
gan = DCGan()
gan.img_width = img_width
gan.img_height = img_height
gan.img_channels = img_channels
gan.random_input_dim = 200
gan.glove_source_dir_path = './very_large_data'
batch_size = 16
epochs = 1000
gan.fit(model_dir_path=model_dir_path, image_label_pairs=image_label_pairs,
snapshot_dir_path=current_dir + '/data/snapshots',
snapshot_interval=100,
batch_size=batch_size,
epochs=epochs)
if __name__ == '__main__':
main()
Below is the sample codes on how to load the trained DCGan model to generate 3 new pokemon samples from each text description of a pokemon:
import os
import sys
import numpy as np
from random import shuffle
def main():
seed = 42
np.random.seed(seed)
current_dir = os.path.dirname(__file__)
sys.path.append(os.path.join(current_dir, '..'))
current_dir = current_dir if current_dir is not '' else '.'
img_dir_path = current_dir + '/data/pokemon/img'
txt_dir_path = current_dir + '/data/pokemon/txt'
model_dir_path = current_dir + '/models'
img_width = 32
img_height = 32
from keras_text_to_image.library.dcgan import DCGan
from keras_text_to_image.library.utility.image_utils import img_from_normalized_img
from keras_text_to_image.library.utility.img_cap_loader import load_normalized_img_and_its_text
image_label_pairs = load_normalized_img_and_its_text(img_dir_path, txt_dir_path, img_width=img_width, img_height=img_height)
shuffle(image_label_pairs)
gan = DCGan()
gan.load_model(model_dir_path)
for i in range(3):
image_label_pair = image_label_pairs[i]
normalized_image = image_label_pair[0]
text = image_label_pair[1]
image = img_from_normalized_img(normalized_image)
image.save(current_dir + '/data/outputs/' + DCGan.model_name + '-generated-' + str(i) + '-0.png')
for j in range(3):
generated_image = gan.generate_image_from_text(text)
generated_image.save(current_dir + '/data/outputs/' + DCGan.model_name + '-generated-' + str(i) + '-' + str(j) + '.png')
if __name__ == '__main__':
main()
- Step 1: Change tensorflow to tensorflow-gpu in requirements.txt and install tensorflow-gpu
- Step 2: Download and install the CUDA® Toolkit 9.0 (Please note that currently CUDA® Toolkit 9.1 is not yet supported by tensorflow, therefore you should download CUDA® Toolkit 9.0)
- Step 3: Download and unzip the cuDNN 7.4 for CUDA@ Toolkit 9.0 and add the bin folder of the unzipped directory to the $PATH of your Windows environment