From 24442c7732d5aff6ee552152a3441b75b1d1c612 Mon Sep 17 00:00:00 2001 From: Jan Stenner Date: Wed, 28 Jun 2023 17:48:09 +0200 Subject: [PATCH] terminated/truncated fix --- src/data_hook.jl | 4 ++-- src/multi_controller.jl | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/data_hook.jl b/src/data_hook.jl index 2f1bb6e2..c1a72f81 100644 --- a/src/data_hook.jl +++ b/src/data_hook.jl @@ -763,7 +763,7 @@ function (hook::DataHook)(::PostEpisodeStage, agent, env, training = false) if training if isa(agent, MultiController) - if length(hook.rewards) >= 1 && sum(hook.reward) > maximum(sum.(hook.rewards)) + if length(hook.rewards) >= 1 && sum(hook.reward) > maximum(sum.(hook.rewards)) && env.steps >= env.maxsteps if hook.is_inner_hook_RL for name in hook.policy_names if isa(agent.agents[name]["policy"], Agent) @@ -775,7 +775,7 @@ function (hook::DataHook)(::PostEpisodeStage, agent, env, training = false) hook.bestreward = sum(hook.reward) end else - if length(hook.rewards) >= 1 && hook.reward > maximum(hook.rewards) + if length(hook.rewards) >= 1 && hook.reward > maximum(hook.rewards) && env.steps >= env.maxsteps hook.bestepisode = hook.ep hook.bestreward = hook.reward end diff --git a/src/multi_controller.jl b/src/multi_controller.jl index 5904772a..409624c0 100644 --- a/src/multi_controller.jl +++ b/src/multi_controller.jl @@ -293,6 +293,29 @@ function DefaultDataHook(Multi_Agent, env) return hook end +""" +Provide a special update for setting no 'terminal' flag when the env is just truncated. +""" +function RLBase.update!( + trajectory::AbstractTrajectory, + policy::AbstractPolicy, + env::ElectricGridEnv, + ::PostActStage, +) + r = policy isa NamedPolicy ? reward(env, nameof(policy)) : reward(env) + push!(trajectory[:reward], r) + if is_terminated(env) + if env.steps >= env.maxsteps + push!(trajectory[:terminal], false) + else + push!(trajectory[:terminal], true) + end + else + push!(trajectory[:terminal], false) + end +end + + """ Wrapps the Run function form https://juliareinforcementlearning.org/ to enable turning off the action noise.