-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcem.py
54 lines (39 loc) · 1.56 KB
/
cem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory
from env import GameEnv
ENV_NAME = 'CartPole-v0'
# Get the environment and extract the number of actions.
env = GameEnv(shape=(5,5))
nb_actions = env.action_space.n
# Option 2: deep network
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())
# Finally, we configure and compile our agent. You can use every
# built-in Keras optimizer and even the metrics!
memory = EpisodeParameterMemory(limit=1000, window_length=1)
cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
batch_size=50, nb_steps_warmup=2000, train_interval=50,
elite_frac=0.05)
cem.compile()
# Okay, now it's time to learn something! We visualize the training
# here for show, but this slows down training quite a lot. You can
# always safely abort the training prematurely using Ctrl + C.
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
# After training is done, we save the best weights.
# cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)
# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=5, visualize=True)