diff --git a/examples/experiment_eval.py b/examples/experiment_eval.py
index 82fe4bbd0..55e3db036 100644
--- a/examples/experiment_eval.py
+++ b/examples/experiment_eval.py
@@ -21,7 +21,7 @@
 )
 from sotopia.envs.evaluators import (
     EvaluationForTwoAgents,
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     RuleBasedTerminatedEvaluator,
     SotopiaDimensions,
 )
@@ -164,7 +164,7 @@ def _iterate_env_agent_combo_not_in_db(
                     RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
                 ],
                 terminal_evaluators=[
-                    ReachGoalLLMEvaluator(
+                    EpisodeLLMEvaluator(
                         model_names["env"],
                         EvaluationForTwoAgents[evaluation_dimensions],  # type: ignore
                         # TODO check how to do type annotation
diff --git a/examples/experimental/sotopia_original_replica/llm_agent_sotopia.py b/examples/experimental/sotopia_original_replica/llm_agent_sotopia.py
index abe959294..46d299a13 100644
--- a/examples/experimental/sotopia_original_replica/llm_agent_sotopia.py
+++ b/examples/experimental/sotopia_original_replica/llm_agent_sotopia.py
@@ -1,11 +1,14 @@
 import logging
 import sys
+import json
 from rich.logging import RichHandler
 
 from aact import NodeFactory
 
 from sotopia.experimental.agents.base_agent import BaseAgent
 from sotopia.experimental.agents.datamodels import Observation, AgentAction
+from sotopia.database.persistent_profile import AgentProfile
+from typing import Any
 
 from sotopia.generation_utils import agenerate
 from sotopia.generation_utils.generate import StrOutputParser
@@ -33,11 +36,13 @@ def __init__(
         input_channels: list[str],
         output_channel: str,
         query_interval: int,
-        agent_name: str,
         node_name: str,
-        goal: str,
         model_name: str,
-        redis_url: str,
+        goal: str,
+        agent_name: str = "",
+        background: dict[str, Any] | None = None,
+        agent_pk: str | None = None,
+        redis_url: str = "redis://localhost:6379/0",
     ):
         super().__init__(
             [(input_channel, Observation) for input_channel in input_channels],
@@ -47,11 +52,35 @@ def __init__(
         )
         self.output_channel = output_channel
         self.query_interval = query_interval
-        self.count_ticks = 0
+        self.count_ticks: int = 0
         self.message_history: list[Observation] = []
-        self.name = agent_name
-        self.model_name = model_name
-        self.goal = goal
+        self.goal: str = goal
+        self.model_name: str = model_name
+        self.agent_profile_pk: str | None = agent_pk
+        self.name: str = agent_name
+        self.background: dict[str, Any] | None = background
+        self.awake: bool = False
+
+    def set_profile(self, use_pk_value: bool) -> None:
+        if not use_pk_value:
+            assert (
+                self.background is not None and self.name is not None
+            ), "Background and name must be provided"
+            if " " in self.name:
+                first_name, last_name = self.name.split(" ", 1)
+            else:
+                first_name = self.name
+                last_name = ""
+            profile = AgentProfile(
+                first_name=first_name, last_name=last_name, **self.background
+            )
+            profile.save()
+        else:
+            profile = AgentProfile.get(pk=self.agent_profile_pk)
+
+        self.agent_profile_pk = profile.pk
+        self.name = " ".join([profile.first_name, profile.last_name]).strip()
+        self.background = profile.model_dump()
 
     def _format_message_history(self, message_history: list[Observation]) -> str:
         ## TODO: akhatua Fix the mapping of action to be gramatically correct
@@ -59,11 +88,23 @@ def _format_message_history(self, message_history: list[Observation]) -> str:
 
     async def aact(self, obs: Observation) -> AgentAction:
         if obs.turn_number == -1:
+            if self.awake:
+                return AgentAction(
+                    agent_name=self.name,
+                    output_channel=self.output_channel,
+                    action_type="none",
+                    argument="",
+                )
+            args = json.loads(obs.last_turn)
+            self.set_profile(args["use_pk_value"])
+            self.awake = True
             return AgentAction(
                 agent_name=self.name,
                 output_channel=self.output_channel,
                 action_type="none",
-                argument=self.model_name,
+                argument=json.dumps(
+                    {"pk": self.agent_profile_pk, "model_name": self.model_name}
+                ),
             )
 
         self.message_history.append(obs)
diff --git a/examples/experimental/sotopia_original_replica/origin.toml b/examples/experimental/sotopia_original_replica/origin.toml
index 7bf225273..562f22fe7 100644
--- a/examples/experimental/sotopia_original_replica/origin.toml
+++ b/examples/experimental/sotopia_original_replica/origin.toml
@@ -1,5 +1,5 @@
 redis_url = "redis://localhost:6379/0"
-extra_modules = ["examples.experimental.sotopia_original_replica.llm_agent_sotopia", "examples.experimental.nodes.chat_print_node", "sotopia.experimental.agents.moderator"]
+extra_modules = ["examples.experimental.sotopia_original_replica.llm_agent_sotopia", "examples.experimental.nodes.chat_print_node", "sotopia.experimental.agents.moderator","sotopia.experimental.agents.evaluators"]
 
 
 [[nodes]]
@@ -9,11 +9,13 @@ node_class = "moderator"
 [nodes.node_args]
 output_channels = ["moderator:Jane", "moderator:Jack"]
 input_channels = ["Jane:moderator", "Jack:moderator"]
-agent_backgrounds = {"Jane" = "", "Jack" = ""}
+evaluator_channels = [["evaluator:moderator","moderator:evaluator"]]
 agent_mapping = {"moderator:Jane" = "Jane", "moderator:Jack" = "Jack"}
 scenario = "Two friends are sitting in a cafe and catching up with each other's lives."
-max_turns = 2
+max_turns = 3
 push_to_db = false
+evaluate_episode = true
+use_pk_value = false
 
 [[nodes]]
 node_name = "Jack"
@@ -26,6 +28,8 @@ output_channel = "Jack:moderator"
 goal = "Your goal is to borrow 5000 dollars from Jane."
 model_name = "gpt-4o-mini"
 agent_name = "Jack"
+background = {"occupation" = "construction worker"}
+agent_pk = ""
 
 
 [[nodes]]
@@ -39,6 +43,8 @@ input_channels = ["moderator:Jane"]
 goal = "Your goal is to help Jack however, you are in a finicial crisis yourself and can only afford to give him 500 dollars."
 model_name = "gpt-4o-mini"
 agent_name = "Jane"
+background = {"occupation" = "gardener"}
+agent_pk = ""
 
 [[nodes]]
 node_name = "chat_print"
@@ -50,3 +56,12 @@ node_class = "chat_print"
 
 [nodes.node_args]
 env_agents = ["Jack", "Jane"]
+
+[[nodes]]
+node_name = "evaluator"
+node_class = "evaluator"
+
+[nodes.node_args]
+input_channels = ["moderator:evaluator"]
+output_channels = ["evaluator:moderator"]
+model_name = "gpt-4o-mini"
diff --git a/examples/fix_missing_episodes.py b/examples/fix_missing_episodes.py
index 8f999d2af..8eb27f355 100644
--- a/examples/fix_missing_episodes.py
+++ b/examples/fix_missing_episodes.py
@@ -20,7 +20,7 @@
 )
 from sotopia.envs.evaluators import (
     EvaluationForTwoAgents,
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     RuleBasedTerminatedEvaluator,
     SotopiaDimensions,
 )
@@ -229,7 +229,7 @@ def yield_env_agent_combo(
                 RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
             ],
             terminal_evaluators=[
-                ReachGoalLLMEvaluator(
+                EpisodeLLMEvaluator(
                     model_names["env"],
                     EvaluationForTwoAgents[SotopiaDimensions],
                 ),
diff --git a/examples/fix_missing_episodes_with_tag.py b/examples/fix_missing_episodes_with_tag.py
index 31b9ee8fd..312f5e648 100644
--- a/examples/fix_missing_episodes_with_tag.py
+++ b/examples/fix_missing_episodes_with_tag.py
@@ -36,7 +36,7 @@
 )
 from sotopia.envs.evaluators import (
     EvaluationForTwoAgents,
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     RuleBasedTerminatedEvaluator,
     SotopiaDimensions,
 )
@@ -327,7 +327,7 @@ def yield_env_agent_combo(
                 RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
             ],
             terminal_evaluators=[
-                ReachGoalLLMEvaluator(
+                EpisodeLLMEvaluator(
                     model_names["env"],
                     EvaluationForTwoAgents[SotopiaDimensions],
                 ),
diff --git a/examples/use_custom_dimensions.py b/examples/use_custom_dimensions.py
index 48c7a195e..576e2dfd5 100644
--- a/examples/use_custom_dimensions.py
+++ b/examples/use_custom_dimensions.py
@@ -7,7 +7,7 @@
 from typing import Type, Union
 from redis_om import Migrator
 from sotopia.envs.evaluators import (
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     EvaluationForTwoAgents,
     RuleBasedTerminatedEvaluator,
 )
@@ -152,7 +152,7 @@ def run_simple_sample_with_custom_samples(
         custom_dimensions, list_name="custom"
     )
     evaluator = RuleBasedTerminatedEvaluator(max_turn_number=10, max_stale_turn=2)
-    terminal_evaluator = ReachGoalLLMEvaluator(
+    terminal_evaluator = EpisodeLLMEvaluator(
         model_name="gpt-4o-mini",
         response_format_class=EvaluationForTwoAgents[custom_dimensions_type],  # type: ignore
     )
diff --git a/sotopia-chat/chat_server.py b/sotopia-chat/chat_server.py
index e6134e908..aa6a68042 100644
--- a/sotopia-chat/chat_server.py
+++ b/sotopia-chat/chat_server.py
@@ -21,7 +21,7 @@
     EnvironmentProfile,
 )
 from sotopia.envs.evaluators import (
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     RuleBasedTerminatedEvaluator,
 )
 from sotopia.envs.parallel import ParallelSotopiaEnv
@@ -64,7 +64,7 @@ async def _start_server_with_two_session_ids_and_agent_env_combo(
             RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
         ],
         terminal_evaluators=[
-            ReachGoalLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]),
+            EpisodeLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]),
         ],
     )
     random.shuffle(session_ids)
@@ -97,7 +97,7 @@ async def _start_server_with_one_session_id_and_agent_env_combo(
             RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
         ],
         terminal_evaluators=[
-            ReachGoalLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]),
+            EpisodeLLMEvaluator("gpt-4", EvaluationForTwoAgents[SotopiaDimensions]),
         ],
     )
 
diff --git a/sotopia/api/fastapi_server.py b/sotopia/api/fastapi_server.py
index f1a3220c6..7c6dcc02d 100644
--- a/sotopia/api/fastapi_server.py
+++ b/sotopia/api/fastapi_server.py
@@ -24,7 +24,7 @@
 from sotopia.envs.parallel import ParallelSotopiaEnv
 from sotopia.envs.evaluators import (
     RuleBasedTerminatedEvaluator,
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     EvaluationForTwoAgents,
     SotopiaDimensions,
 )
@@ -267,7 +267,7 @@ async def nonstreaming_simulation(
             ),
         ],
         "terminal_evaluators": [
-            ReachGoalLLMEvaluator(
+            EpisodeLLMEvaluator(
                 simulation_request.models[0],
                 EvaluationForTwoAgents[SotopiaDimensions],
             ),
diff --git a/sotopia/api/websocket_utils.py b/sotopia/api/websocket_utils.py
index 36cb20c5f..4463e542c 100644
--- a/sotopia/api/websocket_utils.py
+++ b/sotopia/api/websocket_utils.py
@@ -1,6 +1,6 @@
 from sotopia.envs.evaluators import (
     EvaluationForTwoAgents,
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     RuleBasedTerminatedEvaluator,
 )
 from sotopia.agents import Agents, LLMAgent
@@ -98,7 +98,7 @@ def get_env_agents(
             RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
         ],
         terminal_evaluators=[
-            ReachGoalLLMEvaluator(
+            EpisodeLLMEvaluator(
                 evaluator_model,
                 EvaluationForTwoAgents[evaluation_dimensions],  # type: ignore
             ),
diff --git a/sotopia/cli/benchmark/benchmark.py b/sotopia/cli/benchmark/benchmark.py
index 87da66e4e..4e9001812 100644
--- a/sotopia/cli/benchmark/benchmark.py
+++ b/sotopia/cli/benchmark/benchmark.py
@@ -26,7 +26,7 @@
 from sotopia.database.serialization import get_rewards_from_episode
 from sotopia.envs.evaluators import (
     EvaluationForTwoAgents,
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     RuleBasedTerminatedEvaluator,
     SotopiaDimensions,
 )
@@ -363,7 +363,7 @@ def _list_all_env_agent_combo_not_in_db(
                 RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
             ],
             terminal_evaluators=[
-                ReachGoalLLMEvaluator(
+                EpisodeLLMEvaluator(
                     model_names["env"],
                     EvaluationForTwoAgents[SotopiaDimensions],
                 ),
diff --git a/sotopia/database/logs.py b/sotopia/database/logs.py
index e05618f4b..da66c7fcf 100644
--- a/sotopia/database/logs.py
+++ b/sotopia/database/logs.py
@@ -27,7 +27,7 @@ class BaseEpisodeLog(BaseModel):
     tag: str | None = Field(index=True, default="")
     models: list[str] | None = Field(index=True, default=[])
     messages: list[list[tuple[str, str, str]]]  # Messages arranged by turn
-    reasoning: str
+    reasoning: str = Field(default="")
     rewards: list[tuple[float, dict[str, float]] | float]  # Rewards arranged by turn
     rewards_prompt: str
 
diff --git a/sotopia/envs/evaluators.py b/sotopia/envs/evaluators.py
index 709d0bdaa..23af686d9 100644
--- a/sotopia/envs/evaluators.py
+++ b/sotopia/envs/evaluators.py
@@ -257,7 +257,7 @@ async def __acall__(
         return self(turn_number, messages)
 
 
-class ReachGoalLLMEvaluator(Evaluator, Generic[T_eval_dim]):
+class EpisodeLLMEvaluator(Evaluator, Generic[T_eval_dim]):
     @beartype
     def __init__(
         self,
diff --git a/sotopia/experimental/agents/evaluators.py b/sotopia/experimental/agents/evaluators.py
new file mode 100644
index 000000000..d6ceaedaa
--- /dev/null
+++ b/sotopia/experimental/agents/evaluators.py
@@ -0,0 +1,106 @@
+import json
+
+from aact import NodeFactory
+from .base_agent import BaseAgent
+from .logs import EpisodeLog
+from .datamodels import AgentAction, Observation
+from sotopia.database.persistent_profile import AgentProfile
+
+from typing import Generic, TypeVar, Type, Any
+from pydantic import BaseModel, Field
+
+from sotopia.envs.evaluators import GoalDimension
+from sotopia.generation_utils.generate import agenerate
+from langchain.output_parsers import PydanticOutputParser
+
+
+default_reward_prompt = """
+{history}
+Based on previous interactions, evaluate how well each of the participants achieve their goals.
+Here are the list of agents:
+{agent_list}
+Please output your evaluation following the format:
+{format_instructions}
+"""
+
+T_eval_dim = TypeVar("T_eval_dim", bound=BaseModel)
+
+
+class EvaluationForMutiAgents(BaseModel, Generic[T_eval_dim]):
+    agents_evaluation: dict[str, T_eval_dim] = Field(
+        description="the evaluation for each agent, the key is the agent name,be sure to include every agent in the agent list, the value should follow the evaluation dimension format"
+    )
+
+
+@NodeFactory.register("evaluator")
+class Evaluator(BaseAgent[Observation, AgentAction]):
+    def __init__(
+        self,
+        node_name: str,
+        model_name: str,
+        input_channels: list[str],
+        output_channels: list[str],
+        redis_url: str,
+        reward_prompt: str = default_reward_prompt,
+        eval_dim_class: str = "GoalDimension",
+        temperature: float = 0.0,
+    ):
+        super().__init__(
+            input_channel_types=[
+                (input_channel, Observation) for input_channel in input_channels
+            ],
+            output_channel_types=[
+                (output_channel, AgentAction) for output_channel in output_channels
+            ],
+            node_name=node_name,
+            redis_url=redis_url,
+        )
+        self.output_channels = output_channels
+        self.model_name = model_name
+        self.reward_prompt = reward_prompt
+        self.temperature = temperature
+        if eval_dim_class == "GoalDimension":
+            self.response_format_class: Type[BaseModel] = EvaluationForMutiAgents[
+                GoalDimension
+            ]
+        else:
+            raise ValueError(
+                f"the eval_dim_class : {eval_dim_class} is not implemented"
+            )
+        # TODO: need a registry for the evaluation dimension class, so dimension can be initialized with a str
+
+    async def aact(self, content: Observation) -> AgentAction:
+        epilog = EpisodeLog(**json.loads(content.last_turn))
+
+        result = await self.aevaluate(epilog)
+        return AgentAction(
+            agent_name="evaluator",
+            output_channel=f"evaluator:{content.agent_name}",
+            action_type="speak",
+            argument=json.dumps(
+                {"reward": json.dumps(result), "reward_prompt": self.reward_prompt}
+            ),
+        )
+
+    async def aevaluate(self, episode: EpisodeLog) -> Any:
+        # TODO: below is a temporary implementation, need to replaced by using render_for_humans in EpisodeLog
+        history = "\n".join(
+            f"{msg[0][0]} said: {msg[0][2]}" for msg in episode.messages
+        )
+        agent_list = []
+        for pk in episode.agents:
+            agent = AgentProfile.get(pk)
+            name = agent.first_name + " " + agent.last_name
+            name = name.strip()
+            agent_list.append(name)
+
+        res: BaseModel = await agenerate(
+            model_name=self.model_name,
+            template=self.reward_prompt,
+            input_values=dict(history=history, agent_list=str(agent_list)),
+            output_parser=PydanticOutputParser[self.response_format_class](  # type: ignore[name-defined]
+                pydantic_object=self.response_format_class
+            ),
+            temperature=self.temperature,
+        )
+        return res.model_dump()["agents_evaluation"]
diff --git a/sotopia/experimental/agents/logs.py b/sotopia/experimental/agents/logs.py
new file mode 100644
index 000000000..4702f46cc
--- /dev/null
+++ b/sotopia/experimental/agents/logs.py
@@ -0,0 +1,8 @@
+from redis_om import JsonModel
+from sotopia.database.logs import BaseEpisodeLog
+from sotopia.database.persistent_profile import AgentProfile
+
+
+class EpisodeLog(BaseEpisodeLog, JsonModel):
+    def render_for_humans(self) -> tuple[list[AgentProfile], list[str]]:
+        raise NotImplementedError
diff --git a/sotopia/experimental/agents/moderator.py b/sotopia/experimental/agents/moderator.py
index ce57fb38b..5848a8480 100644
--- a/sotopia/experimental/agents/moderator.py
+++ b/sotopia/experimental/agents/moderator.py
@@ -1,22 +1,21 @@
 import asyncio
 import sys
-
+import json
 
 if sys.version_info < (3, 11):
     from typing_extensions import Self
 else:
     from typing import Self
 
-
 from aact import Message, NodeFactory, Node
 from aact.messages import DataModel, DataModelFactory
 
 from typing import Literal, Any, AsyncIterator
 from pydantic import Field
 
-from sotopia.database import EpisodeLog
 from .datamodels import AgentAction, Observation
 from sotopia.messages import ActionType
+from .logs import EpisodeLog
 
 
 @DataModelFactory.register("observations")
@@ -30,12 +29,13 @@ class Observations(DataModel):
 class Moderator(Node[AgentAction, Observation]):
     def __init__(
         self,
+        node_name: str,
         input_channels: list[str],
         output_channels: list[str],
         scenario: str,
         agent_mapping: dict[str, str],
-        node_name: str,
-        agent_backgrounds: dict[str, str],
+        evaluator_channels: list[list[str]] = [],
+        tag: str = "",
         redis_url: str = "redis://localhost:6379/0",
         action_order: Literal["simultaneous", "round-robin", "random"] = "round-robin",
         available_actions: list[ActionType] = [
@@ -47,11 +47,15 @@ def __init__(
         ],
         max_turns: int = 20,
         push_to_db: bool = False,
-    ):
+        use_pk_value: bool = False,
+        evaluate_episode: bool = False,
+    ) -> None:
+        print([(channel[0], AgentAction) for channel in evaluator_channels])
         super().__init__(
             input_channel_types=[
                 (input_channel, AgentAction) for input_channel in input_channels
-            ],
+            ]
+            + [(channel[0], AgentAction) for channel in evaluator_channels],
             output_channel_types=[
                 (output_channel, Observation) for output_channel in output_channels
             ],
@@ -62,6 +66,7 @@ def __init__(
         self.task_scheduler: asyncio.Task[None] | None = None
         self.shutdown_event: asyncio.Event = asyncio.Event()
         self.agent_mapping: dict[str, str] = agent_mapping
+        self.tag: str = tag
         self.action_order: Literal["simultaneous", "round-robin", "random"] = (
             action_order
         )
@@ -71,14 +76,20 @@ def __init__(
         self.current_agent_index: int = 0
         self.scenario: str = scenario
         self.agents: list[str] = list(agent_mapping.values())
+        self.agents_pk: dict[str, str] = {}
         self.agent_models: dict[str, str] = {}
         self.agents_awake: dict[str, bool] = {name: False for name in self.agents}
         self.all_agents_awake: asyncio.Event = asyncio.Event()
-        self.message_history: list[list[tuple[str, str, str]]] = [
-            [("Environment", "Environment", self.scenario)]
-        ]
-        self.push_to_db = push_to_db
-        self.agent_backgrounds = agent_backgrounds
+        self.evaluator_channels: list[list[str]] = evaluator_channels
+        self.push_to_db: bool = push_to_db
+        self.use_pk_value: bool = use_pk_value
+
+        self.evaluate_episode: bool = evaluate_episode
+        assert (not self.evaluate_episode) or len(
+            evaluator_channels
+        ) > 0, "if evaluate_episode is True, evaluator_channels should not be empty"
+
+        self.epilog: EpisodeLog  # will be initialized in booting process
 
         if self.action_order == "round-robin":
             pass
@@ -122,7 +133,7 @@ async def _task_scheduler(self) -> None:
         await self.all_agents_awake.wait()
         while not self.shutdown_event.is_set():
             observation = await self.observation_queue.get()
-            action_or_none = await self.aact(observation)
+            action_or_none = await self.astep(observation)
             if action_or_none is not None:
                 await self.send(action_or_none)
             self.observation_queue.task_done()
@@ -131,7 +142,7 @@ async def booting(self) -> None:
         """
         1. send checking message to agents for every 0.1 seconds, until all agents are awake
         - this message has turn_number of -1 for identification, agents should not record this into actual message_history
-        - if the agent booted succesfully, he is expected to return its model name for record.
+        - if the agent booted succesfully, he is expected to return its agent_profile's pk for record.
         2. (under round-robin action order)after all agents are awake, send agent[0] a message to allow the agent to start speaking
         """
         while not self.all_agents_awake.is_set():
@@ -140,7 +151,11 @@ async def booting(self) -> None:
                     observations_map={
                         output_channel: Observation(
                             agent_name="moderator",
-                            last_turn=self.scenario,
+                            last_turn=json.dumps(
+                                {
+                                    "use_pk_value": self.use_pk_value,
+                                }
+                            ),
                             turn_number=-1,
                             available_actions=["none"],
                         )
@@ -148,21 +163,34 @@ async def booting(self) -> None:
                     }
                 )
             )
-            await asyncio.sleep(0.1)
+            await asyncio.sleep(0.2)
             while not self.observation_queue.empty():
                 agent_action = await self.observation_queue.get()
-                self.agents_awake[agent_action.agent_name] = True
-                self.agent_models[agent_action.agent_name] = agent_action.argument
+                if not self.agents_awake[agent_action.agent_name]:
+                    self.agents_awake[agent_action.agent_name] = True
+                    args: dict[str, Any] = json.loads(agent_action.argument)
+                    self.agents_pk[agent_action.agent_name] = args["pk"]
+                    self.agent_models[agent_action.agent_name] = args["model_name"]
             if False not in self.agents_awake.values():
                 self.all_agents_awake.set()
+                print("all agents are awake")
 
+        self.epilog = EpisodeLog(
+            environment=self.scenario,
+            agents=list(self.agents_pk.values()),
+            tag=self.tag,
+            models=list(self.agent_models.values()),
+            messages=[[("Environment", "Environment", self.scenario)]],
+            rewards=[0.0] * len(self.agents),
+            rewards_prompt="",
+        )
         if self.action_order == "round-robin":
             await self.send(
                 Observations(
                     observations_map={
                         output_channel: Observation(
                             agent_name="moderator",
-                            last_turn=self.agent_backgrounds[agent_name],
+                            last_turn=self.scenario,
                             turn_number=0,
                             available_actions=self.available_actions
                             if agent_name == self.agents[0]
@@ -175,39 +203,67 @@ async def booting(self) -> None:
             self.current_agent_index += 1
 
     async def wrap_up_and_stop(self) -> None:
-        if self.push_to_db:
-            await self.save()
+        try:
+            await asyncio.sleep(0.1)
+            print("all agents have left, wrap up and stop")
+            self.shutdown_event.set()  # this will disable the task scheduler
+            if self.evaluate_episode:
+                epilog = await self.aeval(self.epilog)
+            if self.push_to_db:
+                epilog.save()
+        except Exception as e:
+            print(f"error in wrap_up_and_stop: {e}")
         await asyncio.sleep(0.5)
-        print("stopping all agents")
+        print("result of this episode:\n", self.epilog.model_dump_json())
         await self.r.publish(
-            f"shutdown:{self.node_name}",
+            "shutdown:moderator",
             "shutdown",
         )
 
-    async def save(self) -> EpisodeLog:
+    async def episode_log_to_messages(
+        self, epilog: EpisodeLog
+    ) -> list[tuple[str, str, str]]:
+        messages = []
+        for turn_number, turn in enumerate(epilog.messages):
+            for message in turn:
+                messages.append((message[0], message[1], message[2]))
+        return messages
+
+    async def aeval(self, epilog: EpisodeLog) -> EpisodeLog:
         """
-        save the EpisodeLog to redis, without evaluating
-        TODO: specify what to be added inside tag
-        TODO: update the code so that EpisodeLog.render_for_humans() can work
-            -currently it cannot work because no AgentProfile has been uploaded to redis
-            -such a process should be done back in the agents' end
-            -also the current agentslist is consist of names, but not uuid's of agents
+        evaluate the episode
+        will send the epilog to evaluators, and wait for the evaluation to be finished
         """
-        epilog = EpisodeLog(
-            environment=self.scenario,
-            agents=self.agents,
-            tag=None,
-            models=list(self.agent_models.values()),
-            messages=self.message_history,
-            reasoning="",
-            rewards=[0] * len(self.agents),
-            rewards_prompt="",
-        )
-        epilog.save()
-        # print(epilog.render_for_humans())
+        assert len(self.evaluator_channels) == 1, "currently only support one evaluator"
+
+        for evaluator_channel in self.evaluator_channels:
+            print(evaluator_channel[1])
+            await self.r.publish(
+                evaluator_channel[1],
+                Message[Observation](
+                    data=Observation(
+                        agent_name="moderator",
+                        last_turn=epilog.model_dump_json(),
+                        turn_number=self.turn_number,
+                        available_actions=self.available_actions,
+                    )
+                ).model_dump_json(),
+            )
+
+        print("episode eval started")
+
+        for _ in range(
+            len(self.evaluator_channels)
+        ):  # the queue will take in input and output from this channel
+            raw_res = await self.observation_queue.get()
+            res = json.loads(raw_res.argument)
+            epilog.rewards = res["reward"]
+            epilog.rewards_prompt = res["reward_prompt"]
+
+        print("episode eval finished")
         return epilog
 
-    async def aact(self, agent_action: AgentAction) -> Observations | None:
+    async def astep(self, agent_action: AgentAction) -> Observations | None:
         if agent_action.action_type == "leave":
             self.agents_awake[agent_action.agent_name] = False
             if True not in self.agents_awake.values():
@@ -216,24 +272,16 @@ async def aact(self, agent_action: AgentAction) -> Observations | None:
         if agent_action.action_type == "none":
             return None
 
-        if len(self.message_history) == 1:
-            self.message_history[0].append(
+        # message (sender, receivers (seperated by comma), message content)
+        self.epilog.messages.append(
+            [
                 (
                     agent_action.agent_name,
                     "Environment",
                     agent_action.to_natural_language(),
                 )
-            )
-        else:
-            self.message_history.append(
-                [
-                    (
-                        agent_action.agent_name,
-                        "Environment",
-                        agent_action.to_natural_language(),
-                    )
-                ]
-            )
+            ]
+        )
 
         if self.turn_number < self.max_turns:
             self.turn_number += 1
@@ -266,5 +314,4 @@ async def aact(self, agent_action: AgentAction) -> Observations | None:
             )
             observations_map[output_channel] = observation
         self.current_agent_index = (self.current_agent_index + 1) % len(self.agents)
-
         return Observations(observations_map=observations_map)
diff --git a/sotopia/server.py b/sotopia/server.py
index ba88e9a7b..d838d6007 100644
--- a/sotopia/server.py
+++ b/sotopia/server.py
@@ -19,7 +19,7 @@
 from sotopia.envs import ParallelSotopiaEnv
 from sotopia.envs.evaluators import (
     EvaluationForTwoAgents,
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     RuleBasedTerminatedEvaluator,
     SotopiaDimensions,
     unweighted_aggregate_evaluate,
@@ -309,7 +309,7 @@ def get_agent_class(
                 RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
             ],
             "terminal_evaluators": [
-                ReachGoalLLMEvaluator(
+                EpisodeLLMEvaluator(
                     model_dict["env"],
                     EvaluationForTwoAgents[SotopiaDimensions],
                 ),
@@ -389,7 +389,7 @@ async def arun_one_script(
     env_message = [("Environment", script_background)]
     agent_messages = env_message + agent_messages
 
-    evaluator = ReachGoalLLMEvaluator(
+    evaluator = EpisodeLLMEvaluator(
         model_name="gpt-4",
         response_format_class=EvaluationForTwoAgents[SotopiaDimensions],
     )
@@ -460,7 +460,7 @@ async def aevaluate_one_episode(
     history = episode.rewards_prompt.replace("Prompt after formatting:", "").split(
         ",\nBased on previous interactions"
     )[0]
-    evaluator = ReachGoalLLMEvaluator(
+    evaluator = EpisodeLLMEvaluator(
         model_name=model,
         response_format_class=EvaluationForTwoAgents[SotopiaDimensions],
     )
diff --git a/tests/api/test_fastapi.py b/tests/api/test_fastapi.py
index 0a4f6c3c9..97b569d17 100644
--- a/tests/api/test_fastapi.py
+++ b/tests/api/test_fastapi.py
@@ -370,48 +370,3 @@ def test_websocket_simulate(create_mock_data: Callable[[], None]) -> None:
             "type": "FINISH_SIM",
         }
         websocket.send_json(end_msg)
-
-
-# def test_simulate(create_mock_data: Callable[[], None]) -> None:
-#     response = client.post(
-#         "/simulate",
-#         json={
-#             "env_id": "tmppk_env_profile",
-#             "agent_ids": ["tmppk_agent1", "tmppk_agent2"],
-#             "models": [
-#                 # "custom/llama3.2:1b@http://localhost:8000/v1",
-#                 # "custom/llama3.2:1b@http://localhost:8000/v1",
-#                 # "custom/llama3.2:1b@http://localhost:8000/v1"
-#                 "gpt-4o-mini",
-#                 "gpt-4o-mini",
-#                 "gpt-4o-mini",
-#             ],
-#             "max_turns": 2,
-#             "tag": "test_tag",
-#         },
-#     )
-#     assert response.status_code == 200
-#     assert isinstance(response.json(), str)
-#     max_retries = 20
-#     retry_count = 0
-#     while retry_count < max_retries:
-#         try:
-#             status = NonStreamingSimulationStatus.find(
-#                 NonStreamingSimulationStatus.episode_pk == response.json()
-#             ).all()[0]
-#             assert isinstance(status, NonStreamingSimulationStatus)
-#             print(status)
-#             if status.status == "Error":
-#                 raise Exception("Error running simulation")
-#             elif status.status == "Completed":
-#                 # EpisodeLog.get(response.json())
-#                 break
-#             # Status is "Started", keep polling
-#             time.sleep(1)
-#             retry_count += 1
-#         except Exception as e:
-#             print(f"Error checking simulation status: {e}")
-#             time.sleep(1)
-#             retry_count += 1
-#     else:
-#         raise TimeoutError("Simulation timed out after 10 retries")
diff --git a/tests/envs/test_evaluators.py b/tests/envs/test_evaluators.py
index a5285ea76..ee2381fb4 100644
--- a/tests/envs/test_evaluators.py
+++ b/tests/envs/test_evaluators.py
@@ -4,7 +4,7 @@
 
 from sotopia.envs.evaluators import (
     EvaluationForTwoAgents,
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     RuleBasedTerminatedEvaluator,
     unweighted_aggregate_evaluate,
 )
@@ -134,7 +134,7 @@ class _ReachGoal(BaseModel):
 
 @pytest.mark.asyncio
 async def test_reach_goal_llm_evaluator_async() -> None:
-    evaluator = ReachGoalLLMEvaluator(
+    evaluator = EpisodeLLMEvaluator(
         "custom/structured@http://localhost:8000/v1",
         response_format_class=EvaluationForTwoAgents[_ReachGoal],
     )
diff --git a/tests/integration/test_benchmark.py b/tests/integration/test_benchmark.py
index 23f97c8d1..6aa86c6cc 100644
--- a/tests/integration/test_benchmark.py
+++ b/tests/integration/test_benchmark.py
@@ -23,7 +23,7 @@
 )
 from sotopia.envs.evaluators import (
     EvaluationForTwoAgents,
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     RuleBasedTerminatedEvaluator,
     SotopiaDimensions,
 )
@@ -143,7 +143,7 @@ def compose_env_agent_combo(
         model_name="gpt-4o-mini",
         evaluators=[RuleBasedTerminatedEvaluator(max_turn_number=1, max_stale_turn=2)],
         terminal_evaluators=[
-            ReachGoalLLMEvaluator(
+            EpisodeLLMEvaluator(
                 "gpt-4o-mini",
                 EvaluationForTwoAgents[SotopiaDimensions],
             )
diff --git a/uv.lock b/uv.lock
index d2e999d49..025afafa7 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3258,7 +3258,7 @@ wheels = [
 
 [[package]]
 name = "sotopia"
-version = "0.1.2"
+version = "0.1.3"
 source = { editable = "." }
 dependencies = [
     { name = "aact" },