-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval.py
66 lines (52 loc) · 2.29 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from models.agent import DDPGAgent
from environment.mujoco_env import MazeEnv
import numpy as np
#The position of the actual goal
_GOAL_POS = np.array([0.0, 16.0]).reshape(1,2)
def eval_policy(agent: DDPGAgent, env: MazeEnv, eval_iter: int, episode_count: int, timestep_count: int, render: bool = False) -> None:
'''
Evaluates performance of a policy and print out summary of those.
Parameters
----------
agent: DDPGAgent
The policy/agent to evaluate
env: MazeEnv
The environment where the policy is evaluated.
eval_iter: int
The amount of iterations that the policy is evaluated for.
episode_count: int
The amount of episodes in each evaluation iteration.
timestep_count: int
The maximum amount of timesteps the agent has to reach the goal during an episode.
render: bool, Optional
If set to true, the environment will be rendered during each iteration. Default False.
'''
assert eval_iter > 0, f"The amount of evaluation iterations must be atleast 1, got {eval_iter}"
header = f"{'evaluation':^16s}|{'episodes':^16s}|{'goal reached':^16s}|{'goals reached %':^16s}|{'avg timestep':^16s}"
delim = f"{16*'-'}|{16*'-'}|{16*'-'}|{16*'-'}|{16*'-'}"
values = "{:^16d}|{:^16d}|{:^16d}|{:^16.2f}%|{:^16.2f}"
print(f"{header}\n{delim}")
goals_reached = np.zeros(episode_count, dtype=np.int32)
ts_counts = np.zeros(episode_count)
env.eval = True
env.goals = _GOAL_POS
for i in range(eval_iter):
#Reset stats
goals_reached.fill(False)
ts_counts.fill(0.0)
for ep in range(episode_count):
state = env.reset()
for ts in range(timestep_count):
action = agent.act(state, use_noise=False)
print(action)
*_, done = env.step(action)
if render:
env.render()
if done:
ts_counts[ep] = ts
goals_reached[ep] = 1
break
reached_count = np.count_nonzero(goals_reached)
reached_idx = np.nonzero(goals_reached)
print(values.format(i, episode_count, reached_count, float(reached_count/episode_count)*100, np.mean(ts_counts[reached_idx])))
print("Evaluation done!")